summaryrefslogtreecommitdiff
path: root/Documentation
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation')
-rw-r--r--Documentation/00-INDEX4
-rw-r--r--Documentation/ABI/testing/devlink-resource-mlxsw33
-rw-r--r--Documentation/ABI/testing/evm54
-rw-r--r--Documentation/ABI/testing/ima_policy3
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio14
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-dfsdm-adc-stm3216
-rw-r--r--Documentation/ABI/testing/sysfs-bus-pci-drivers-xhci_hcd25
-rw-r--r--Documentation/ABI/testing/sysfs-class-led-trigger-netdev45
-rw-r--r--Documentation/ABI/testing/sysfs-class-net24
-rw-r--r--Documentation/ABI/testing/sysfs-devices-system-cpu16
-rw-r--r--Documentation/ABI/testing/sysfs-fs-f2fs6
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-livepatch26
-rw-r--r--Documentation/IRQ-domain.txt36
-rw-r--r--Documentation/RCU/Design/Data-Structures/Data-Structures.html49
-rw-r--r--Documentation/RCU/Design/Requirements/Requirements.html7
-rw-r--r--Documentation/RCU/rcu_dereference.txt6
-rw-r--r--Documentation/RCU/stallwarn.txt10
-rw-r--r--Documentation/RCU/whatisRCU.txt3
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt79
-rw-r--r--Documentation/admin-guide/mono.rst6
-rw-r--r--Documentation/arm64/cpu-feature-registers.txt4
-rw-r--r--Documentation/arm64/elf_hwcaps.txt4
-rw-r--r--Documentation/arm64/silicon-errata.txt2
-rw-r--r--Documentation/bpf/bpf_devel_QA.txt519
-rw-r--r--Documentation/cgroup-v1/cgroups.txt7
-rw-r--r--Documentation/cgroup-v1/memory.txt4
-rw-r--r--Documentation/cgroup-v2.txt77
-rw-r--r--Documentation/circular-buffers.txt3
-rw-r--r--Documentation/conf.py1
-rw-r--r--Documentation/core-api/errseq.rst (renamed from Documentation/errseq.rst)20
-rw-r--r--Documentation/core-api/index.rst3
-rw-r--r--Documentation/core-api/kernel-api.rst15
-rw-r--r--Documentation/core-api/printk-formats.rst (renamed from Documentation/printk-formats.txt)227
-rw-r--r--Documentation/core-api/refcount-vs-atomic.rst150
-rw-r--r--Documentation/device-mapper/cache-policies.txt4
-rw-r--r--Documentation/device-mapper/cache.txt9
-rw-r--r--Documentation/device-mapper/dm-raid.txt5
-rw-r--r--Documentation/device-mapper/snapshot.txt4
-rw-r--r--Documentation/device-mapper/thin-provisioning.txt14
-rw-r--r--Documentation/device-mapper/unstriped.txt124
-rw-r--r--Documentation/devicetree/bindings/arm/arm-dsu-pmu.txt27
-rw-r--r--Documentation/devicetree/bindings/arm/firmware/sdei.txt42
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/armada-37xx.txt19
-rw-r--r--Documentation/devicetree/bindings/crypto/arm-cryptocell.txt22
-rw-r--r--Documentation/devicetree/bindings/crypto/inside-secure-safexcel.txt3
-rw-r--r--Documentation/devicetree/bindings/crypto/samsung,exynos-rng4.txt4
-rw-r--r--Documentation/devicetree/bindings/crypto/st,stm32-cryp.txt19
-rw-r--r--Documentation/devicetree/bindings/dma/qcom_hidma_mgmt.txt4
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-axp209.txt49
-rw-r--r--Documentation/devicetree/bindings/gpio/renesas,gpio-rcar.txt2
-rw-r--r--Documentation/devicetree/bindings/hwmon/aspeed-pwm-tacho.txt14
-rw-r--r--Documentation/devicetree/bindings/iio/adc/amlogic,meson-saradc.txt1
-rw-r--r--Documentation/devicetree/bindings/iio/adc/aspeed_adc.txt4
-rw-r--r--Documentation/devicetree/bindings/iio/adc/at91-sama5d2_adc.txt7
-rw-r--r--Documentation/devicetree/bindings/iio/adc/sigma-delta-modulator.txt13
-rw-r--r--Documentation/devicetree/bindings/iio/adc/st,stm32-adc.txt24
-rw-r--r--Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.txt128
-rw-r--r--Documentation/devicetree/bindings/iio/health/max30102.txt8
-rw-r--r--Documentation/devicetree/bindings/iio/light/uvis25.txt23
-rw-r--r--Documentation/devicetree/bindings/input/hid-over-i2c.txt2
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/brcm,bcm2836-l1-intc.txt4
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/google,goldfish-pic.txt30
-rw-r--r--Documentation/devicetree/bindings/leds/leds-lm3692x.txt49
-rw-r--r--Documentation/devicetree/bindings/leds/leds-lp8860.txt32
-rw-r--r--Documentation/devicetree/bindings/mfd/mc13xxx.txt2
-rw-r--r--Documentation/devicetree/bindings/mfd/syscon.txt8
-rw-r--r--Documentation/devicetree/bindings/mmc/mtk-sd.txt2
-rw-r--r--Documentation/devicetree/bindings/mmc/tmio_mmc.txt1
-rw-r--r--Documentation/devicetree/bindings/mtd/fsl-quadspi.txt2
-rw-r--r--Documentation/devicetree/bindings/mtd/gpmc-onenand.txt6
-rw-r--r--Documentation/devicetree/bindings/mtd/marvell-nand.txt123
-rw-r--r--Documentation/devicetree/bindings/mtd/mtk-nand.txt11
-rw-r--r--Documentation/devicetree/bindings/mtd/nand.txt1
-rw-r--r--Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt5
-rw-r--r--Documentation/devicetree/bindings/net/can/can-transceiver.txt24
-rw-r--r--Documentation/devicetree/bindings/net/can/fsl-flexcan.txt6
-rw-r--r--Documentation/devicetree/bindings/net/can/m_can.txt9
-rw-r--r--Documentation/devicetree/bindings/net/can/rcar_can.txt7
-rw-r--r--Documentation/devicetree/bindings/net/cortina,gemini-ethernet.txt92
-rw-r--r--Documentation/devicetree/bindings/net/fsl-fec.txt4
-rw-r--r--Documentation/devicetree/bindings/net/ieee802154/adf7242.txt2
-rw-r--r--Documentation/devicetree/bindings/net/mediatek-net.txt2
-rw-r--r--Documentation/devicetree/bindings/net/phy.txt12
-rw-r--r--Documentation/devicetree/bindings/net/sff,sfp.txt10
-rw-r--r--Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt48
-rw-r--r--Documentation/devicetree/bindings/net/socionext-netsec.txt53
-rw-r--r--Documentation/devicetree/bindings/net/ti-bluetooth.txt (renamed from Documentation/devicetree/bindings/net/ti,wilink-st.txt)18
-rw-r--r--Documentation/devicetree/bindings/net/wireless/mediatek,mt76.txt32
-rw-r--r--Documentation/devicetree/bindings/net/wireless/qcom,ath10k.txt3
-rw-r--r--Documentation/devicetree/bindings/opp/opp.txt13
-rw-r--r--Documentation/devicetree/bindings/opp/ti-omap5-opp-supply.txt63
-rw-r--r--Documentation/devicetree/bindings/power/power_domain.txt65
-rw-r--r--Documentation/devicetree/bindings/power/reset/imx-snvs-poweroff.txt23
-rw-r--r--Documentation/devicetree/bindings/power/supply/bq27xxx.txt1
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/mpc5200.txt2
-rw-r--r--Documentation/devicetree/bindings/regulator/regulator.txt12
-rw-r--r--Documentation/devicetree/bindings/regulator/sprd,sc2731-regulator.txt43
-rw-r--r--Documentation/devicetree/bindings/rng/brcm,bcm2835.txt22
-rw-r--r--Documentation/devicetree/bindings/rng/brcm,bcm6368.txt17
-rw-r--r--Documentation/devicetree/bindings/scsi/hisilicon-sas.txt5
-rw-r--r--Documentation/devicetree/bindings/serial/fsl-imx-uart.txt4
-rw-r--r--Documentation/devicetree/bindings/serial/fsl-lpuart.txt3
-rw-r--r--Documentation/devicetree/bindings/serial/ingenic,uart.txt8
-rw-r--r--Documentation/devicetree/bindings/serial/maxim,max310x.txt18
-rw-r--r--Documentation/devicetree/bindings/serial/mvebu-uart.txt50
-rw-r--r--Documentation/devicetree/bindings/serial/omap_serial.txt1
-rw-r--r--Documentation/devicetree/bindings/serial/rs485.txt1
-rw-r--r--Documentation/devicetree/bindings/sound/dmic.txt2
-rw-r--r--Documentation/devicetree/bindings/sound/max98373.txt40
-rw-r--r--Documentation/devicetree/bindings/sound/mt2701-afe-pcm.txt266
-rw-r--r--Documentation/devicetree/bindings/sound/mxs-audio-sgtl5000.txt33
-rw-r--r--Documentation/devicetree/bindings/sound/nau8825.txt4
-rw-r--r--Documentation/devicetree/bindings/sound/pcm186x.txt42
-rw-r--r--Documentation/devicetree/bindings/sound/renesas,rsnd.txt15
-rw-r--r--Documentation/devicetree/bindings/sound/simple-card.txt3
-rw-r--r--Documentation/devicetree/bindings/sound/st,stm32-adfsdm.txt63
-rw-r--r--Documentation/devicetree/bindings/sound/st,stm32-sai.txt12
-rw-r--r--Documentation/devicetree/bindings/sound/sun4i-i2s.txt2
-rw-r--r--Documentation/devicetree/bindings/sound/tas5720.txt4
-rw-r--r--Documentation/devicetree/bindings/sound/tfa9879.txt8
-rw-r--r--Documentation/devicetree/bindings/sound/ti,tas6424.txt20
-rw-r--r--Documentation/devicetree/bindings/sound/tlv320aic31xx.txt9
-rw-r--r--Documentation/devicetree/bindings/sound/tlv320aic3x.txt10
-rw-r--r--Documentation/devicetree/bindings/sound/tscs42xx.txt16
-rw-r--r--Documentation/devicetree/bindings/sound/uniphier,evea.txt26
-rw-r--r--Documentation/devicetree/bindings/spi/sh-msiof.txt16
-rw-r--r--Documentation/devicetree/bindings/spi/spi-meson.txt4
-rw-r--r--Documentation/devicetree/bindings/spi/spi-orion.txt9
-rw-r--r--Documentation/devicetree/bindings/spi/spi-xilinx.txt2
-rw-r--r--Documentation/devicetree/bindings/timer/actions,owl-timer.txt1
-rw-r--r--Documentation/devicetree/bindings/timer/spreadtrum,sprd-timer.txt20
-rw-r--r--Documentation/devicetree/bindings/usb/dwc3.txt2
-rw-r--r--Documentation/devicetree/bindings/usb/mediatek,mtk-xhci.txt18
-rw-r--r--Documentation/devicetree/bindings/usb/mediatek,mtu3.txt15
-rw-r--r--Documentation/devicetree/bindings/usb/renesas_usbhs.txt2
-rw-r--r--Documentation/devicetree/bindings/usb/usb-device.txt68
-rw-r--r--Documentation/devicetree/bindings/usb/usb-xhci.txt5
-rw-r--r--Documentation/devicetree/bindings/vendor-prefixes.txt1
-rw-r--r--Documentation/devicetree/bindings/watchdog/zii,rave-sp-wdt.txt39
-rw-r--r--Documentation/doc-guide/kernel-doc.rst360
-rw-r--r--Documentation/driver-api/basics.rst21
-rw-r--r--Documentation/driver-api/dmaengine/provider.rst38
-rw-r--r--Documentation/driver-api/iio/hw-consumer.rst51
-rw-r--r--Documentation/driver-api/iio/index.rst1
-rw-r--r--Documentation/driver-api/pm/devices.rst54
-rw-r--r--Documentation/driver-api/scsi.rst10
-rw-r--r--Documentation/driver-api/usb/usb3-debug-port.rst52
-rw-r--r--Documentation/driver-api/usb/writing_usb_driver.rst2
-rw-r--r--Documentation/driver-model/devres.txt3
-rw-r--r--Documentation/fault-injection/fault-injection.txt70
-rw-r--r--Documentation/features/debug/KASAN/arch-support.txt2
-rw-r--r--Documentation/features/debug/stackprotector/arch-support.txt2
-rw-r--r--Documentation/filesystems/ext2.txt2
-rw-r--r--Documentation/filesystems/ext4.txt7
-rw-r--r--Documentation/filesystems/nfs/Exporting27
-rw-r--r--Documentation/filesystems/nilfs2.txt4
-rw-r--r--Documentation/filesystems/vfat.txt2
-rw-r--r--Documentation/gpio/board.txt14
-rw-r--r--Documentation/gpio/consumer.txt107
-rw-r--r--Documentation/gpio/driver.txt4
-rw-r--r--Documentation/gpio/sysfs.txt11
-rw-r--r--Documentation/gpu/i915.rst5
-rw-r--r--Documentation/hwmon/lm2506620
-rw-r--r--Documentation/hwmon/max3178515
-rw-r--r--Documentation/hwmon/w83773g33
-rw-r--r--Documentation/i2c/dev-interface17
-rw-r--r--Documentation/index.rst13
-rw-r--r--Documentation/input/multi-touch-protocol.rst9
-rw-r--r--Documentation/kbuild/kconfig-language.txt44
-rw-r--r--Documentation/kernel-doc-nano-HOWTO.txt322
-rw-r--r--Documentation/kernel-hacking/hacking.rst6
-rw-r--r--Documentation/livepatch/livepatch.txt114
-rw-r--r--Documentation/locking/locktorture.txt5
-rw-r--r--Documentation/maintainer/conf.py10
-rw-r--r--Documentation/maintainer/configure-git.rst34
-rw-r--r--Documentation/maintainer/index.rst14
-rw-r--r--Documentation/maintainer/pull-requests.rst178
-rw-r--r--Documentation/md/raid5-ppl.txt7
-rw-r--r--Documentation/memory-barriers.txt22
-rw-r--r--Documentation/mtd/spi-nor.txt3
-rw-r--r--Documentation/networking/00-INDEX4
-rw-r--r--Documentation/networking/batman-adv.rst2
-rw-r--r--Documentation/networking/can.rst1437
-rw-r--r--Documentation/networking/can.txt1308
-rw-r--r--Documentation/networking/dsa/dsa.txt5
-rw-r--r--Documentation/networking/filter.txt2
-rw-r--r--Documentation/networking/ieee802154.txt40
-rw-r--r--Documentation/networking/index.rst3
-rw-r--r--Documentation/networking/ip-sysctl.txt1
-rw-r--r--Documentation/networking/kapi.rst24
-rw-r--r--Documentation/networking/msg_zerocopy.rst4
-rw-r--r--Documentation/networking/netdev-features.txt9
-rw-r--r--Documentation/networking/pktgen.txt19
-rw-r--r--Documentation/networking/xfrm_device.txt135
-rw-r--r--Documentation/networking/xfrm_proc.txt20
-rw-r--r--Documentation/perf/arm_dsu_pmu.txt28
-rw-r--r--Documentation/power/pci.txt11
-rw-r--r--Documentation/power/regulator/machine.txt36
-rw-r--r--Documentation/process/kernel-enforcement-statement.rst4
-rw-r--r--Documentation/process/license-rules.rst370
-rw-r--r--Documentation/process/submit-checklist.rst4
-rw-r--r--Documentation/security/credentials.rst7
-rw-r--r--Documentation/security/self-protection.rst15
-rw-r--r--Documentation/sparc/oradax/dax-hv-api.txt1433
-rw-r--r--Documentation/sparc/oradax/oracle-dax.txt429
-rw-r--r--Documentation/sphinx/kfigure.py6
-rw-r--r--Documentation/sysctl/kernel.txt17
-rw-r--r--Documentation/sysctl/net.txt4
-rw-r--r--Documentation/sysctl/vm.txt25
-rw-r--r--Documentation/thermal/cpu-cooling-api.txt115
-rw-r--r--Documentation/trace/ftrace-uses.rst60
-rw-r--r--Documentation/usb/chipidea.txt12
-rw-r--r--Documentation/usb/gadget-testing.txt2
-rw-r--r--Documentation/usb/usbip_protocol.txt1
-rw-r--r--Documentation/virtual/kvm/api.txt46
-rw-r--r--Documentation/vm/hugetlbpage.txt27
-rw-r--r--Documentation/vm/hwpoison.txt2
-rw-r--r--Documentation/w1/masters/w1-gpio17
-rw-r--r--Documentation/w1/w1.generic2
-rw-r--r--Documentation/x86/intel_rdt_ui.txt13
-rw-r--r--Documentation/x86/pti.txt186
-rw-r--r--Documentation/x86/x86_64/mm.txt18
-rw-r--r--Documentation/xtensa/mmu.txt78
223 files changed, 8699 insertions, 2796 deletions
diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX
index 3bec49c33bbb..7f3a0728ccf2 100644
--- a/Documentation/00-INDEX
+++ b/Documentation/00-INDEX
@@ -228,8 +228,6 @@ isdn/
- directory with info on the Linux ISDN support, and supported cards.
kbuild/
- directory with info about the kernel build process.
-kernel-doc-nano-HOWTO.txt
- - outdated info about kernel-doc documentation.
kdump/
- directory with mini HowTo on getting the crash dump code to work.
doc-guide/
@@ -346,8 +344,6 @@ prctl/
- directory with info on the priveledge control subsystem
preempt-locking.txt
- info on locking under a preemptive kernel.
-printk-formats.txt
- - how to get printk format specifiers right
process/
- how to work with the mainline kernel development process.
pps/
diff --git a/Documentation/ABI/testing/devlink-resource-mlxsw b/Documentation/ABI/testing/devlink-resource-mlxsw
new file mode 100644
index 000000000000..259ed2948ec0
--- /dev/null
+++ b/Documentation/ABI/testing/devlink-resource-mlxsw
@@ -0,0 +1,33 @@
+What: /kvd/
+Date: 08-Jan-2018
+KernelVersion: v4.16
+Contact: mlxsw@mellanox.com
+Description: The main database in the Spectrum device is a centralized
+ KVD database used for many of the tables used to configure
+ the chip including L2 FDB, L3 LPM, ECMP and more. The KVD
+ is divided into two sections, the first is hash-based table
+ and the second is a linear access table. The division
+ between the linear and hash-based sections is static and
+ require reload before the changes take effect.
+
+What: /kvd/linear
+Date: 08-Jan-2018
+KernelVersion: v4.16
+Contact: mlxsw@mellanox.com
+Description: The linear section of the KVD is managed by software as a
+ flat memory accessed using an index.
+
+What: /kvd/hash_single
+Date: 08-Jan-2018
+KernelVersion: v4.16
+Contact: mlxsw@mellanox.com
+Description: The hash based section of the KVD is managed by the switch
+ device. Used in case the key size is smaller or equal to
+ 64bit.
+
+What: /kvd/hash_double
+Date: 08-Jan-2018
+KernelVersion: v4.16
+Contact: mlxsw@mellanox.com
+Description: The hash based section of the KVD is managed by the switch
+ device. Used in case the key is larger than 64 bit.
diff --git a/Documentation/ABI/testing/evm b/Documentation/ABI/testing/evm
index 9578247e1792..d12cb2eae9ee 100644
--- a/Documentation/ABI/testing/evm
+++ b/Documentation/ABI/testing/evm
@@ -14,30 +14,46 @@ Description:
generated either locally or remotely using an
asymmetric key. These keys are loaded onto root's
keyring using keyctl, and EVM is then enabled by
- echoing a value to <securityfs>/evm:
+ echoing a value to <securityfs>/evm made up of the
+ following bits:
- 1: enable HMAC validation and creation
- 2: enable digital signature validation
- 3: enable HMAC and digital signature validation and HMAC
- creation
+ Bit Effect
+ 0 Enable HMAC validation and creation
+ 1 Enable digital signature validation
+ 2 Permit modification of EVM-protected metadata at
+ runtime. Not supported if HMAC validation and
+ creation is enabled.
+ 31 Disable further runtime modification of EVM policy
- Further writes will be blocked if HMAC support is enabled or
- if bit 32 is set:
+ For example:
- echo 0x80000002 ><securityfs>/evm
+ echo 1 ><securityfs>/evm
- will enable digital signature validation and block
- further writes to <securityfs>/evm.
+ will enable HMAC validation and creation
- Until this is done, EVM can not create or validate the
- 'security.evm' xattr, but returns INTEGRITY_UNKNOWN.
- Loading keys and signaling EVM should be done as early
- as possible. Normally this is done in the initramfs,
- which has already been measured as part of the trusted
- boot. For more information on creating and loading
- existing trusted/encrypted keys, refer to:
+ echo 0x80000003 ><securityfs>/evm
- Documentation/security/keys/trusted-encrypted.rst. Both dracut
- (via 97masterkey and 98integrity) and systemd (via
+ will enable HMAC and digital signature validation and
+ HMAC creation and disable all further modification of policy.
+
+ echo 0x80000006 ><securityfs>/evm
+
+ will enable digital signature validation, permit
+ modification of EVM-protected metadata and
+ disable all further modification of policy
+
+ Note that once a key has been loaded, it will no longer be
+ possible to enable metadata modification.
+
+ Until key loading has been signaled EVM can not create
+ or validate the 'security.evm' xattr, but returns
+ INTEGRITY_UNKNOWN. Loading keys and signaling EVM
+ should be done as early as possible. Normally this is
+ done in the initramfs, which has already been measured
+ as part of the trusted boot. For more information on
+ creating and loading existing trusted/encrypted keys,
+ refer to:
+ Documentation/security/keys/trusted-encrypted.rst. Both
+ dracut (via 97masterkey and 98integrity) and systemd (via
core/ima-setup) have support for loading keys at boot
time.
diff --git a/Documentation/ABI/testing/ima_policy b/Documentation/ABI/testing/ima_policy
index e76432b9954d..2028f2d093b2 100644
--- a/Documentation/ABI/testing/ima_policy
+++ b/Documentation/ABI/testing/ima_policy
@@ -17,7 +17,8 @@ Description:
rule format: action [condition ...]
- action: measure | dont_measure | appraise | dont_appraise | audit
+ action: measure | dont_measure | appraise | dont_appraise |
+ audit | hash | dont_hash
condition:= base | lsm [option]
base: [[func=] [mask=] [fsmagic=] [fsuuid=] [uid=]
[euid=] [fowner=]]
diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio
index 2e3f919485f4..6a5f34b4d5b9 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio
+++ b/Documentation/ABI/testing/sysfs-bus-iio
@@ -32,7 +32,7 @@ Description:
Description of the physical chip / device for device X.
Typically a part number.
-What: /sys/bus/iio/devices/iio:deviceX/timestamp_clock
+What: /sys/bus/iio/devices/iio:deviceX/current_timestamp_clock
KernelVersion: 4.5
Contact: linux-iio@vger.kernel.org
Description:
@@ -1290,7 +1290,7 @@ KernelVersion: 3.4
Contact: linux-iio@vger.kernel.org
Description:
Unit-less light intensity. Modifiers both and ir indicate
- that measurements contains visible and infrared light
+ that measurements contain visible and infrared light
components or just infrared light, respectively. Modifier uv indicates
that measurements contain ultraviolet light components.
@@ -1413,6 +1413,16 @@ Description:
the available samples after the timeout expires and thus have a
maximum delay guarantee.
+What: /sys/bus/iio/devices/iio:deviceX/buffer/data_available
+KernelVersion: 4.16
+Contact: linux-iio@vger.kernel.org
+Description:
+ A read-only value indicating the bytes of data available in the
+ buffer. In the case of an output buffer, this indicates the
+ amount of empty space available to write data to. In the case of
+ an input buffer, this indicates the amount of data available for
+ reading.
+
What: /sys/bus/iio/devices/iio:deviceX/buffer/hwfifo_enabled
KernelVersion: 4.2
Contact: linux-iio@vger.kernel.org
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-dfsdm-adc-stm32 b/Documentation/ABI/testing/sysfs-bus-iio-dfsdm-adc-stm32
new file mode 100644
index 000000000000..da9822309f07
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-dfsdm-adc-stm32
@@ -0,0 +1,16 @@
+What: /sys/bus/iio/devices/iio:deviceX/in_voltage_spi_clk_freq
+KernelVersion: 4.14
+Contact: arnaud.pouliquen@st.com
+Description:
+ For audio purpose only.
+ Used by audio driver to set/get the spi input frequency.
+ This is mandatory if DFSDM is slave on SPI bus, to
+ provide information on the SPI clock frequency during runtime
+ Notice that the SPI frequency should be a multiple of sample
+ frequency to ensure the precision.
+ if DFSDM input is SPI master
+ Reading SPI clkout frequency,
+ error on writing
+ If DFSDM input is SPI Slave:
+ Reading returns value previously set.
+ Writing value before starting conversions. \ No newline at end of file
diff --git a/Documentation/ABI/testing/sysfs-bus-pci-drivers-xhci_hcd b/Documentation/ABI/testing/sysfs-bus-pci-drivers-xhci_hcd
new file mode 100644
index 000000000000..0088aba4caa8
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-pci-drivers-xhci_hcd
@@ -0,0 +1,25 @@
+What: /sys/bus/pci/drivers/xhci_hcd/.../dbc
+Date: June 2017
+Contact: Lu Baolu <baolu.lu@linux.intel.com>
+Description:
+ xHCI compatible USB host controllers (i.e. super-speed
+ USB3 controllers) are often implemented with the Debug
+ Capability (DbC). It can present a debug device which
+ is fully compliant with the USB framework and provides
+ the equivalent of a very high performance full-duplex
+ serial link for debug purpose.
+
+ The DbC debug device shares a root port with xHCI host.
+ When the DbC is enabled, the root port will be assigned
+ to the Debug Capability. Otherwise, it will be assigned
+ to xHCI.
+
+ Writing "enable" to this attribute will enable the DbC
+ functionality and the shared root port will be assigned
+ to the DbC device. Writing "disable" to this attribute
+ will disable the DbC functionality and the shared root
+ port will roll back to the xHCI.
+
+ Reading this attribute gives the state of the DbC. It
+ can be one of the following states: disabled, enabled,
+ initialized, connected, configured and stalled.
diff --git a/Documentation/ABI/testing/sysfs-class-led-trigger-netdev b/Documentation/ABI/testing/sysfs-class-led-trigger-netdev
new file mode 100644
index 000000000000..451af6d6768c
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-led-trigger-netdev
@@ -0,0 +1,45 @@
+What: /sys/class/leds/<led>/device_name
+Date: Dec 2017
+KernelVersion: 4.16
+Contact: linux-leds@vger.kernel.org
+Description:
+ Specifies the network device name to monitor.
+
+What: /sys/class/leds/<led>/interval
+Date: Dec 2017
+KernelVersion: 4.16
+Contact: linux-leds@vger.kernel.org
+Description:
+ Specifies the duration of the LED blink in milliseconds.
+ Defaults to 50 ms.
+
+What: /sys/class/leds/<led>/link
+Date: Dec 2017
+KernelVersion: 4.16
+Contact: linux-leds@vger.kernel.org
+Description:
+ Signal the link state of the named network device.
+ If set to 0 (default), the LED's normal state is off.
+ If set to 1, the LED's normal state reflects the link state
+ of the named network device.
+ Setting this value also immediately changes the LED state.
+
+What: /sys/class/leds/<led>/tx
+Date: Dec 2017
+KernelVersion: 4.16
+Contact: linux-leds@vger.kernel.org
+Description:
+ Signal transmission of data on the named network device.
+ If set to 0 (default), the LED will not blink on transmission.
+ If set to 1, the LED will blink for the milliseconds specified
+ in interval to signal transmission.
+
+What: /sys/class/leds/<led>/rx
+Date: Dec 2017
+KernelVersion: 4.16
+Contact: linux-leds@vger.kernel.org
+Description:
+ Signal reception of data on the named network device.
+ If set to 0 (default), the LED will not blink on reception.
+ If set to 1, the LED will blink for the milliseconds specified
+ in interval to signal reception.
diff --git a/Documentation/ABI/testing/sysfs-class-net b/Documentation/ABI/testing/sysfs-class-net
index 6856da99b6f7..2f1788111cd9 100644
--- a/Documentation/ABI/testing/sysfs-class-net
+++ b/Documentation/ABI/testing/sysfs-class-net
@@ -259,3 +259,27 @@ Contact: netdev@vger.kernel.org
Description:
Symbolic link to the PHY device this network device is attached
to.
+
+What: /sys/class/net/<iface>/carrier_changes
+Date: Mar 2014
+KernelVersion: 3.15
+Contact: netdev@vger.kernel.org
+Description:
+ 32-bit unsigned integer counting the number of times the link has
+ seen a change from UP to DOWN and vice versa
+
+What: /sys/class/net/<iface>/carrier_up_count
+Date: Jan 2018
+KernelVersion: 4.16
+Contact: netdev@vger.kernel.org
+Description:
+ 32-bit unsigned integer counting the number of times the link has
+ been up
+
+What: /sys/class/net/<iface>/carrier_down_count
+Date: Jan 2018
+KernelVersion: 4.16
+Contact: netdev@vger.kernel.org
+Description:
+ 32-bit unsigned integer counting the number of times the link has
+ been down
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index d6d862db3b5d..bfd29bc8d37a 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -375,3 +375,19 @@ Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
Description: information about CPUs heterogeneity.
cpu_capacity: capacity of cpu#.
+
+What: /sys/devices/system/cpu/vulnerabilities
+ /sys/devices/system/cpu/vulnerabilities/meltdown
+ /sys/devices/system/cpu/vulnerabilities/spectre_v1
+ /sys/devices/system/cpu/vulnerabilities/spectre_v2
+Date: January 2018
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description: Information about CPU vulnerabilities
+
+ The files are named after the code names of CPU
+ vulnerabilities. The output of those files reflects the
+ state of the CPUs in the system. Possible output values:
+
+ "Not affected" CPU is not affected by the vulnerability
+ "Vulnerable" CPU is affected and no mitigation in effect
+ "Mitigation: $M" CPU is affected and mitigation $M is in effect
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index a7799c2fca28..d870b5514d15 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -186,3 +186,9 @@ Date: August 2017
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
Description:
Controls sleep time of GC urgent mode
+
+What: /sys/fs/f2fs/<disk>/readdir_ra
+Date: November 2017
+Contact: "Sheng Yong" <shengyong1@huawei.com>
+Description:
+ Controls readahead inode block in readdir.
diff --git a/Documentation/ABI/testing/sysfs-kernel-livepatch b/Documentation/ABI/testing/sysfs-kernel-livepatch
index d5d39748382f..dac7e1e62a8b 100644
--- a/Documentation/ABI/testing/sysfs-kernel-livepatch
+++ b/Documentation/ABI/testing/sysfs-kernel-livepatch
@@ -33,6 +33,32 @@ Description:
An attribute which indicates whether the patch is currently in
transition.
+What: /sys/kernel/livepatch/<patch>/signal
+Date: Nov 2017
+KernelVersion: 4.15.0
+Contact: live-patching@vger.kernel.org
+Description:
+ A writable attribute that allows administrator to affect the
+ course of an existing transition. Writing 1 sends a fake
+ signal to all remaining blocking tasks. The fake signal
+ means that no proper signal is delivered (there is no data in
+ signal pending structures). Tasks are interrupted or woken up,
+ and forced to change their patched state.
+
+What: /sys/kernel/livepatch/<patch>/force
+Date: Nov 2017
+KernelVersion: 4.15.0
+Contact: live-patching@vger.kernel.org
+Description:
+ A writable attribute that allows administrator to affect the
+ course of an existing transition. Writing 1 clears
+ TIF_PATCH_PENDING flag of all tasks and thus forces the tasks to
+ the patched or unpatched state. Administrator should not
+ use this feature without a clearance from a patch
+ distributor. Removal (rmmod) of patch modules is permanently
+ disabled when the feature is used. See
+ Documentation/livepatch/livepatch.txt for more information.
+
What: /sys/kernel/livepatch/<patch>/<object>
Date: Nov 2014
KernelVersion: 3.19.0
diff --git a/Documentation/IRQ-domain.txt b/Documentation/IRQ-domain.txt
index 4a1cd7645d85..507775cce753 100644
--- a/Documentation/IRQ-domain.txt
+++ b/Documentation/IRQ-domain.txt
@@ -265,37 +265,5 @@ support other architectures, such as ARM, ARM64 etc.
=== Debugging ===
-If you switch on CONFIG_IRQ_DOMAIN_DEBUG (which depends on
-CONFIG_IRQ_DOMAIN and CONFIG_DEBUG_FS), you will find a new file in
-your debugfs mount point, called irq_domain_mapping. This file
-contains a live snapshot of all the IRQ domains in the system:
-
- name mapped linear-max direct-max devtree-node
- pl061 8 8 0 /smb/gpio@e0080000
- pl061 8 8 0 /smb/gpio@e1050000
- pMSI 0 0 0 /interrupt-controller@e1101000/v2m@e0080000
- MSI 37 0 0 /interrupt-controller@e1101000/v2m@e0080000
- GICv2m 37 0 0 /interrupt-controller@e1101000/v2m@e0080000
- GICv2 448 448 0 /interrupt-controller@e1101000
-
-it also iterates over the interrupts to display their mapping in the
-domains, and makes the domain stacking visible:
-
-
-irq hwirq chip name chip data active type domain
- 1 0x00019 GICv2 0xffff00000916bfd8 * LINEAR GICv2
- 2 0x0001d GICv2 0xffff00000916bfd8 LINEAR GICv2
- 3 0x0001e GICv2 0xffff00000916bfd8 * LINEAR GICv2
- 4 0x0001b GICv2 0xffff00000916bfd8 * LINEAR GICv2
- 5 0x0001a GICv2 0xffff00000916bfd8 LINEAR GICv2
-[...]
- 96 0x81808 MSI 0x (null) RADIX MSI
- 96+ 0x00063 GICv2m 0xffff8003ee116980 RADIX GICv2m
- 96+ 0x00063 GICv2 0xffff00000916bfd8 LINEAR GICv2
- 97 0x08800 MSI 0x (null) * RADIX MSI
- 97+ 0x00064 GICv2m 0xffff8003ee116980 * RADIX GICv2m
- 97+ 0x00064 GICv2 0xffff00000916bfd8 * LINEAR GICv2
-
-Here, interrupts 1-5 are only using a single domain, while 96 and 97
-are build out of a stack of three domain, each level performing a
-particular function.
+Most of the internals of the IRQ subsystem are exposed in debugfs by
+turning CONFIG_GENERIC_IRQ_DEBUGFS on.
diff --git a/Documentation/RCU/Design/Data-Structures/Data-Structures.html b/Documentation/RCU/Design/Data-Structures/Data-Structures.html
index 38d6d800761f..6c06e10bd04b 100644
--- a/Documentation/RCU/Design/Data-Structures/Data-Structures.html
+++ b/Documentation/RCU/Design/Data-Structures/Data-Structures.html
@@ -1097,7 +1097,8 @@ will cause the CPU to disregard the values of its counters on
its next exit from idle.
Finally, the <tt>rcu_qs_ctr_snap</tt> field is used to detect
cases where a given operation has resulted in a quiescent state
-for all flavors of RCU, for example, <tt>cond_resched_rcu_qs()</tt>.
+for all flavors of RCU, for example, <tt>cond_resched()</tt>
+when RCU has indicated a need for quiescent states.
<h5>RCU Callback Handling</h5>
@@ -1182,8 +1183,8 @@ CPU (and from tracing) unless otherwise stated.
Its fields are as follows:
<pre>
- 1 int dynticks_nesting;
- 2 int dynticks_nmi_nesting;
+ 1 long dynticks_nesting;
+ 2 long dynticks_nmi_nesting;
3 atomic_t dynticks;
4 bool rcu_need_heavy_qs;
5 unsigned long rcu_qs_ctr;
@@ -1191,15 +1192,31 @@ Its fields are as follows:
</pre>
<p>The <tt>-&gt;dynticks_nesting</tt> field counts the
-nesting depth of normal interrupts.
-In addition, this counter is incremented when exiting dyntick-idle
-mode and decremented when entering it.
+nesting depth of process execution, so that in normal circumstances
+this counter has value zero or one.
+NMIs, irqs, and tracers are counted by the <tt>-&gt;dynticks_nmi_nesting</tt>
+field.
+Because NMIs cannot be masked, changes to this variable have to be
+undertaken carefully using an algorithm provided by Andy Lutomirski.
+The initial transition from idle adds one, and nested transitions
+add two, so that a nesting level of five is represented by a
+<tt>-&gt;dynticks_nmi_nesting</tt> value of nine.
This counter can therefore be thought of as counting the number
of reasons why this CPU cannot be permitted to enter dyntick-idle
-mode, aside from non-maskable interrupts (NMIs).
-NMIs are counted by the <tt>-&gt;dynticks_nmi_nesting</tt>
-field, except that NMIs that interrupt non-dyntick-idle execution
-are not counted.
+mode, aside from process-level transitions.
+
+<p>However, it turns out that when running in non-idle kernel context,
+the Linux kernel is fully capable of entering interrupt handlers that
+never exit and perhaps also vice versa.
+Therefore, whenever the <tt>-&gt;dynticks_nesting</tt> field is
+incremented up from zero, the <tt>-&gt;dynticks_nmi_nesting</tt> field
+is set to a large positive number, and whenever the
+<tt>-&gt;dynticks_nesting</tt> field is decremented down to zero,
+the the <tt>-&gt;dynticks_nmi_nesting</tt> field is set to zero.
+Assuming that the number of misnested interrupts is not sufficient
+to overflow the counter, this approach corrects the
+<tt>-&gt;dynticks_nmi_nesting</tt> field every time the corresponding
+CPU enters the idle loop from process context.
</p><p>The <tt>-&gt;dynticks</tt> field counts the corresponding
CPU's transitions to and from dyntick-idle mode, so that this counter
@@ -1231,14 +1248,16 @@ in response.
<tr><th>&nbsp;</th></tr>
<tr><th align="left">Quick Quiz:</th></tr>
<tr><td>
- Why not just count all NMIs?
- Wouldn't that be simpler and less error prone?
+ Why not simply combine the <tt>-&gt;dynticks_nesting</tt>
+ and <tt>-&gt;dynticks_nmi_nesting</tt> counters into a
+ single counter that just counts the number of reasons that
+ the corresponding CPU is non-idle?
</td></tr>
<tr><th align="left">Answer:</th></tr>
<tr><td bgcolor="#ffffff"><font color="ffffff">
- It seems simpler only until you think hard about how to go about
- updating the <tt>rcu_dynticks</tt> structure's
- <tt>-&gt;dynticks</tt> field.
+ Because this would fail in the presence of interrupts whose
+ handlers never return and of handlers that manage to return
+ from a made-up interrupt.
</font></td></tr>
<tr><td>&nbsp;</td></tr>
</table>
diff --git a/Documentation/RCU/Design/Requirements/Requirements.html b/Documentation/RCU/Design/Requirements/Requirements.html
index 62e847bcdcdd..49690228b1c6 100644
--- a/Documentation/RCU/Design/Requirements/Requirements.html
+++ b/Documentation/RCU/Design/Requirements/Requirements.html
@@ -581,7 +581,8 @@ This guarantee was only partially premeditated.
DYNIX/ptx used an explicit memory barrier for publication, but had nothing
resembling <tt>rcu_dereference()</tt> for subscription, nor did it
have anything resembling the <tt>smp_read_barrier_depends()</tt>
-that was later subsumed into <tt>rcu_dereference()</tt>.
+that was later subsumed into <tt>rcu_dereference()</tt> and later
+still into <tt>READ_ONCE()</tt>.
The need for these operations made itself known quite suddenly at a
late-1990s meeting with the DEC Alpha architects, back in the days when
DEC was still a free-standing company.
@@ -2797,7 +2798,7 @@ RCU must avoid degrading real-time response for CPU-bound threads, whether
executing in usermode (which is one use case for
<tt>CONFIG_NO_HZ_FULL=y</tt>) or in the kernel.
That said, CPU-bound loops in the kernel must execute
-<tt>cond_resched_rcu_qs()</tt> at least once per few tens of milliseconds
+<tt>cond_resched()</tt> at least once per few tens of milliseconds
in order to avoid receiving an IPI from RCU.
<p>
@@ -3128,7 +3129,7 @@ The solution, in the form of
is to have implicit
read-side critical sections that are delimited by voluntary context
switches, that is, calls to <tt>schedule()</tt>,
-<tt>cond_resched_rcu_qs()</tt>, and
+<tt>cond_resched()</tt>, and
<tt>synchronize_rcu_tasks()</tt>.
In addition, transitions to and from userspace execution also delimit
tasks-RCU read-side critical sections.
diff --git a/Documentation/RCU/rcu_dereference.txt b/Documentation/RCU/rcu_dereference.txt
index 1acb26b09b48..ab96227bad42 100644
--- a/Documentation/RCU/rcu_dereference.txt
+++ b/Documentation/RCU/rcu_dereference.txt
@@ -122,11 +122,7 @@ o Be very careful about comparing pointers obtained from
Note that if checks for being within an RCU read-side
critical section are not required and the pointer is never
dereferenced, rcu_access_pointer() should be used in place
- of rcu_dereference(). The rcu_access_pointer() primitive
- does not require an enclosing read-side critical section,
- and also omits the smp_read_barrier_depends() included in
- rcu_dereference(), which in turn should provide a small
- performance gain in some CPUs (e.g., the DEC Alpha).
+ of rcu_dereference().
o The comparison is against a pointer that references memory
that was initialized "a long time ago." The reason
diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt
index a08f928c8557..4259f95c3261 100644
--- a/Documentation/RCU/stallwarn.txt
+++ b/Documentation/RCU/stallwarn.txt
@@ -23,12 +23,10 @@ o A CPU looping with preemption disabled. This condition can
o A CPU looping with bottom halves disabled. This condition can
result in RCU-sched and RCU-bh stalls.
-o For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the
- kernel without invoking schedule(). Note that cond_resched()
- does not necessarily prevent RCU CPU stall warnings. Therefore,
- if the looping in the kernel is really expected and desirable
- behavior, you might need to replace some of the cond_resched()
- calls with calls to cond_resched_rcu_qs().
+o For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the kernel
+ without invoking schedule(). If the looping in the kernel is
+ really expected and desirable behavior, you might need to add
+ some calls to cond_resched().
o Booting Linux using a console connection that is too slow to
keep up with the boot-time console-message rate. For example,
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index df62466da4e0..a27fbfb0efb8 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -600,8 +600,7 @@ don't forget about them when submitting patches making use of RCU!]
#define rcu_dereference(p) \
({ \
- typeof(p) _________p1 = p; \
- smp_read_barrier_depends(); \
+ typeof(p) _________p1 = READ_ONCE(p); \
(_________p1); \
})
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index af7104aaffd9..dc63e5bddf78 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -114,7 +114,6 @@
This facility can be used to prevent such uncontrolled
GPE floodings.
Format: <int>
- Support masking of GPEs numbered from 0x00 to 0x7f.
acpi_no_auto_serialize [HW,ACPI]
Disable auto-serialization of AML methods
@@ -223,7 +222,7 @@
acpi_sleep= [HW,ACPI] Sleep options
Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig,
- old_ordering, nonvs, sci_force_enable }
+ old_ordering, nonvs, sci_force_enable, nobl }
See Documentation/power/video.txt for information on
s3_bios and s3_mode.
s3_beep is for debugging; it makes the PC's speaker beep
@@ -239,6 +238,9 @@
sci_force_enable causes the kernel to set SCI_EN directly
on resume from S1/S3 (which is against the ACPI spec,
but some broken systems don't work without it).
+ nobl causes the internal blacklist of systems known to
+ behave incorrectly in some ways with respect to system
+ suspend and resume to be ignored (use wisely).
acpi_use_timer_override [HW,ACPI]
Use timer override. For some broken Nvidia NF5 boards
@@ -713,9 +715,6 @@
It will be ignored when crashkernel=X,high is not used
or memory reserved is below 4G.
- crossrelease_fullstack
- [KNL] Allow to record full stack trace in cross-release
-
cryptomgr.notests
[KNL] Disable crypto self-tests
@@ -2053,9 +2052,6 @@
This tests the locking primitive's ability to
transition abruptly to and from idle.
- locktorture.torture_runnable= [BOOT]
- Start locktorture running at boot time.
-
locktorture.torture_type= [KNL]
Specify the locking implementation to test.
@@ -2542,6 +2538,9 @@
This is useful when you use a panic=... timeout and
need the box quickly up again.
+ These settings can be accessed at runtime via
+ the nmi_watchdog and hardlockup_panic sysctls.
+
netpoll.carrier_timeout=
[NET] Specifies amount of time (in seconds) that
netpoll should wait for a carrier. By default netpoll
@@ -2626,6 +2625,11 @@
nosmt [KNL,S390] Disable symmetric multithreading (SMT).
Equivalent to smt=1.
+ nospectre_v2 [X86] Disable all mitigations for the Spectre variant 2
+ (indirect branch prediction) vulnerability. System may
+ allow data leaks with this option, which is equivalent
+ to spectre_v2=off.
+
noxsave [BUGS=X86] Disables x86 extended register state save
and restore using xsave. The kernel will fallback to
enabling legacy floating-point and sse state.
@@ -2712,8 +2716,6 @@
steal time is computed, but won't influence scheduler
behaviour
- nopti [X86-64] Disable kernel page table isolation
-
nolapic [X86-32,APIC] Do not enable or use the local APIC.
nolapic_timer [X86-32,APIC] Do not use the local APIC timer.
@@ -3100,6 +3102,12 @@
pcie_scan_all Scan all possible PCIe devices. Otherwise we
only look for one device below a PCIe downstream
port.
+ big_root_window Try to add a big 64bit memory window to the PCIe
+ root complex on AMD CPUs. Some GFX hardware
+ can resize a BAR to allow access to all VRAM.
+ Adding the window is slightly risky (it may
+ conflict with unreported devices), so this
+ taints the kernel.
pcie_aspm= [PCIE] Forcibly enable or disable PCIe Active State Power
Management.
@@ -3288,11 +3296,20 @@
pt. [PARIDE]
See Documentation/blockdev/paride.txt.
- pti= [X86_64]
- Control user/kernel address space isolation:
- on - enable
- off - disable
- auto - default setting
+ pti= [X86_64] Control Page Table Isolation of user and
+ kernel address spaces. Disabling this feature
+ removes hardening, but improves performance of
+ system calls and interrupts.
+
+ on - unconditionally enable
+ off - unconditionally disable
+ auto - kernel detects whether your CPU model is
+ vulnerable to issues that PTI mitigates
+
+ Not specifying this option is equivalent to pti=auto.
+
+ nopti [X86_64]
+ Equivalent to pti=off
pty.legacy_count=
[KNL] Number of legacy pty's. Overwrites compiled-in
@@ -3471,9 +3488,6 @@
the same as for rcuperf.nreaders.
N, where N is the number of CPUs
- rcuperf.perf_runnable= [BOOT]
- Start rcuperf running at boot time.
-
rcuperf.perf_type= [KNL]
Specify the RCU implementation to test.
@@ -3607,9 +3621,6 @@
Test RCU's dyntick-idle handling. See also the
rcutorture.shuffle_interval parameter.
- rcutorture.torture_runnable= [BOOT]
- Start rcutorture running at boot time.
-
rcutorture.torture_type= [KNL]
Specify the RCU implementation to test.
@@ -3667,7 +3678,8 @@
rdt= [HW,X86,RDT]
Turn on/off individual RDT features. List is:
- cmt, mbmtotal, mbmlocal, l3cat, l3cdp, l2cat, mba.
+ cmt, mbmtotal, mbmlocal, l3cat, l3cdp, l2cat, l2cdp,
+ mba.
E.g. to turn on cmt and turn off mba use:
rdt=cmt,!mba
@@ -3943,6 +3955,29 @@
sonypi.*= [HW] Sony Programmable I/O Control Device driver
See Documentation/laptops/sonypi.txt
+ spectre_v2= [X86] Control mitigation of Spectre variant 2
+ (indirect branch speculation) vulnerability.
+
+ on - unconditionally enable
+ off - unconditionally disable
+ auto - kernel detects whether your CPU model is
+ vulnerable
+
+ Selecting 'on' will, and 'auto' may, choose a
+ mitigation method at run time according to the
+ CPU, the available microcode, the setting of the
+ CONFIG_RETPOLINE configuration option, and the
+ compiler with which the kernel was built.
+
+ Specific mitigations can also be selected manually:
+
+ retpoline - replace indirect branches
+ retpoline,generic - google's original retpoline
+ retpoline,amd - AMD-specific minimal thunk
+
+ Not specifying this option is equivalent to
+ spectre_v2=auto.
+
spia_io_base= [HW,MTD]
spia_fio_base=
spia_pedr=
diff --git a/Documentation/admin-guide/mono.rst b/Documentation/admin-guide/mono.rst
index cdddc099af64..59e6d59f0ed9 100644
--- a/Documentation/admin-guide/mono.rst
+++ b/Documentation/admin-guide/mono.rst
@@ -9,14 +9,14 @@ This will allow you to execute Mono-based .NET binaries just like any
other program after you have done the following:
1) You MUST FIRST install the Mono CLR support, either by downloading
- a binary package, a source tarball or by installing from CVS. Binary
+ a binary package, a source tarball or by installing from Git. Binary
packages for several distributions can be found at:
- http://go-mono.com/download.html
+ http://www.mono-project.com/download/
Instructions for compiling Mono can be found at:
- http://www.go-mono.com/compiling.html
+ http://www.mono-project.com/docs/compiling-mono/linux/
Once the Mono CLR support has been installed, just check that
``/usr/bin/mono`` (which could be located elsewhere, for example
diff --git a/Documentation/arm64/cpu-feature-registers.txt b/Documentation/arm64/cpu-feature-registers.txt
index bd9b3faab2c4..a70090b28b07 100644
--- a/Documentation/arm64/cpu-feature-registers.txt
+++ b/Documentation/arm64/cpu-feature-registers.txt
@@ -110,7 +110,9 @@ infrastructure:
x--------------------------------------------------x
| Name | bits | visible |
|--------------------------------------------------|
- | RES0 | [63-48] | n |
+ | RES0 | [63-52] | n |
+ |--------------------------------------------------|
+ | FHM | [51-48] | y |
|--------------------------------------------------|
| DP | [47-44] | y |
|--------------------------------------------------|
diff --git a/Documentation/arm64/elf_hwcaps.txt b/Documentation/arm64/elf_hwcaps.txt
index 89edba12a9e0..57324ee55ecc 100644
--- a/Documentation/arm64/elf_hwcaps.txt
+++ b/Documentation/arm64/elf_hwcaps.txt
@@ -158,3 +158,7 @@ HWCAP_SHA512
HWCAP_SVE
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001.
+
+HWCAP_ASIMDFHM
+
+ Functionality implied by ID_AA64ISAR0_EL1.FHM == 0b0001.
diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt
index fc1c884fea10..c1d520de6dfe 100644
--- a/Documentation/arm64/silicon-errata.txt
+++ b/Documentation/arm64/silicon-errata.txt
@@ -72,7 +72,7 @@ stable kernels.
| Hisilicon | Hip0{6,7} | #161010701 | N/A |
| Hisilicon | Hip07 | #161600802 | HISILICON_ERRATUM_161600802 |
| | | | |
-| Qualcomm Tech. | Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 |
+| Qualcomm Tech. | Kryo/Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 |
| Qualcomm Tech. | Falkor v1 | E1009 | QCOM_FALKOR_ERRATUM_1009 |
| Qualcomm Tech. | QDF2400 ITS | E0065 | QCOM_QDF2400_ERRATUM_0065 |
| Qualcomm Tech. | Falkor v{1,2} | E1041 | QCOM_FALKOR_ERRATUM_1041 |
diff --git a/Documentation/bpf/bpf_devel_QA.txt b/Documentation/bpf/bpf_devel_QA.txt
new file mode 100644
index 000000000000..cefef855dea4
--- /dev/null
+++ b/Documentation/bpf/bpf_devel_QA.txt
@@ -0,0 +1,519 @@
+This document provides information for the BPF subsystem about various
+workflows related to reporting bugs, submitting patches, and queueing
+patches for stable kernels.
+
+For general information about submitting patches, please refer to
+Documentation/process/. This document only describes additional specifics
+related to BPF.
+
+Reporting bugs:
+---------------
+
+Q: How do I report bugs for BPF kernel code?
+
+A: Since all BPF kernel development as well as bpftool and iproute2 BPF
+ loader development happens through the netdev kernel mailing list,
+ please report any found issues around BPF to the following mailing
+ list:
+
+ netdev@vger.kernel.org
+
+ This may also include issues related to XDP, BPF tracing, etc.
+
+ Given netdev has a high volume of traffic, please also add the BPF
+ maintainers to Cc (from kernel MAINTAINERS file):
+
+ Alexei Starovoitov <ast@kernel.org>
+ Daniel Borkmann <daniel@iogearbox.net>
+
+ In case a buggy commit has already been identified, make sure to keep
+ the actual commit authors in Cc as well for the report. They can
+ typically be identified through the kernel's git tree.
+
+ Please do *not* report BPF issues to bugzilla.kernel.org since it
+ is a guarantee that the reported issue will be overlooked.
+
+Submitting patches:
+-------------------
+
+Q: To which mailing list do I need to submit my BPF patches?
+
+A: Please submit your BPF patches to the netdev kernel mailing list:
+
+ netdev@vger.kernel.org
+
+ Historically, BPF came out of networking and has always been maintained
+ by the kernel networking community. Although these days BPF touches
+ many other subsystems as well, the patches are still routed mainly
+ through the networking community.
+
+ In case your patch has changes in various different subsystems (e.g.
+ tracing, security, etc), make sure to Cc the related kernel mailing
+ lists and maintainers from there as well, so they are able to review
+ the changes and provide their Acked-by's to the patches.
+
+Q: Where can I find patches currently under discussion for BPF subsystem?
+
+A: All patches that are Cc'ed to netdev are queued for review under netdev
+ patchwork project:
+
+ http://patchwork.ozlabs.org/project/netdev/list/
+
+ Those patches which target BPF, are assigned to a 'bpf' delegate for
+ further processing from BPF maintainers. The current queue with
+ patches under review can be found at:
+
+ https://patchwork.ozlabs.org/project/netdev/list/?delegate=77147
+
+ Once the patches have been reviewed by the BPF community as a whole
+ and approved by the BPF maintainers, their status in patchwork will be
+ changed to 'Accepted' and the submitter will be notified by mail. This
+ means that the patches look good from a BPF perspective and have been
+ applied to one of the two BPF kernel trees.
+
+ In case feedback from the community requires a respin of the patches,
+ their status in patchwork will be set to 'Changes Requested', and purged
+ from the current review queue. Likewise for cases where patches would
+ get rejected or are not applicable to the BPF trees (but assigned to
+ the 'bpf' delegate).
+
+Q: How do the changes make their way into Linux?
+
+A: There are two BPF kernel trees (git repositories). Once patches have
+ been accepted by the BPF maintainers, they will be applied to one
+ of the two BPF trees:
+
+ https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git/
+ https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/
+
+ The bpf tree itself is for fixes only, whereas bpf-next for features,
+ cleanups or other kind of improvements ("next-like" content). This is
+ analogous to net and net-next trees for networking. Both bpf and
+ bpf-next will only have a master branch in order to simplify against
+ which branch patches should get rebased to.
+
+ Accumulated BPF patches in the bpf tree will regularly get pulled
+ into the net kernel tree. Likewise, accumulated BPF patches accepted
+ into the bpf-next tree will make their way into net-next tree. net and
+ net-next are both run by David S. Miller. From there, they will go
+ into the kernel mainline tree run by Linus Torvalds. To read up on the
+ process of net and net-next being merged into the mainline tree, see
+ the netdev FAQ under:
+
+ Documentation/networking/netdev-FAQ.txt
+
+ Occasionally, to prevent merge conflicts, we might send pull requests
+ to other trees (e.g. tracing) with a small subset of the patches, but
+ net and net-next are always the main trees targeted for integration.
+
+ The pull requests will contain a high-level summary of the accumulated
+ patches and can be searched on netdev kernel mailing list through the
+ following subject lines (yyyy-mm-dd is the date of the pull request):
+
+ pull-request: bpf yyyy-mm-dd
+ pull-request: bpf-next yyyy-mm-dd
+
+Q: How do I indicate which tree (bpf vs. bpf-next) my patch should be
+ applied to?
+
+A: The process is the very same as described in the netdev FAQ, so
+ please read up on it. The subject line must indicate whether the
+ patch is a fix or rather "next-like" content in order to let the
+ maintainers know whether it is targeted at bpf or bpf-next.
+
+ For fixes eventually landing in bpf -> net tree, the subject must
+ look like:
+
+ git format-patch --subject-prefix='PATCH bpf' start..finish
+
+ For features/improvements/etc that should eventually land in
+ bpf-next -> net-next, the subject must look like:
+
+ git format-patch --subject-prefix='PATCH bpf-next' start..finish
+
+ If unsure whether the patch or patch series should go into bpf
+ or net directly, or bpf-next or net-next directly, it is not a
+ problem either if the subject line says net or net-next as target.
+ It is eventually up to the maintainers to do the delegation of
+ the patches.
+
+ If it is clear that patches should go into bpf or bpf-next tree,
+ please make sure to rebase the patches against those trees in
+ order to reduce potential conflicts.
+
+ In case the patch or patch series has to be reworked and sent out
+ again in a second or later revision, it is also required to add a
+ version number (v2, v3, ...) into the subject prefix:
+
+ git format-patch --subject-prefix='PATCH net-next v2' start..finish
+
+ When changes have been requested to the patch series, always send the
+ whole patch series again with the feedback incorporated (never send
+ individual diffs on top of the old series).
+
+Q: What does it mean when a patch gets applied to bpf or bpf-next tree?
+
+A: It means that the patch looks good for mainline inclusion from
+ a BPF point of view.
+
+ Be aware that this is not a final verdict that the patch will
+ automatically get accepted into net or net-next trees eventually:
+
+ On the netdev kernel mailing list reviews can come in at any point
+ in time. If discussions around a patch conclude that they cannot
+ get included as-is, we will either apply a follow-up fix or drop
+ them from the trees entirely. Therefore, we also reserve to rebase
+ the trees when deemed necessary. After all, the purpose of the tree
+ is to i) accumulate and stage BPF patches for integration into trees
+ like net and net-next, and ii) run extensive BPF test suite and
+ workloads on the patches before they make their way any further.
+
+ Once the BPF pull request was accepted by David S. Miller, then
+ the patches end up in net or net-next tree, respectively, and
+ make their way from there further into mainline. Again, see the
+ netdev FAQ for additional information e.g. on how often they are
+ merged to mainline.
+
+Q: How long do I need to wait for feedback on my BPF patches?
+
+A: We try to keep the latency low. The usual time to feedback will
+ be around 2 or 3 business days. It may vary depending on the
+ complexity of changes and current patch load.
+
+Q: How often do you send pull requests to major kernel trees like
+ net or net-next?
+
+A: Pull requests will be sent out rather often in order to not
+ accumulate too many patches in bpf or bpf-next.
+
+ As a rule of thumb, expect pull requests for each tree regularly
+ at the end of the week. In some cases pull requests could additionally
+ come also in the middle of the week depending on the current patch
+ load or urgency.
+
+Q: Are patches applied to bpf-next when the merge window is open?
+
+A: For the time when the merge window is open, bpf-next will not be
+ processed. This is roughly analogous to net-next patch processing,
+ so feel free to read up on the netdev FAQ about further details.
+
+ During those two weeks of merge window, we might ask you to resend
+ your patch series once bpf-next is open again. Once Linus released
+ a v*-rc1 after the merge window, we continue processing of bpf-next.
+
+ For non-subscribers to kernel mailing lists, there is also a status
+ page run by David S. Miller on net-next that provides guidance:
+
+ http://vger.kernel.org/~davem/net-next.html
+
+Q: I made a BPF verifier change, do I need to add test cases for
+ BPF kernel selftests?
+
+A: If the patch has changes to the behavior of the verifier, then yes,
+ it is absolutely necessary to add test cases to the BPF kernel
+ selftests suite. If they are not present and we think they are
+ needed, then we might ask for them before accepting any changes.
+
+ In particular, test_verifier.c is tracking a high number of BPF test
+ cases, including a lot of corner cases that LLVM BPF back end may
+ generate out of the restricted C code. Thus, adding test cases is
+ absolutely crucial to make sure future changes do not accidentally
+ affect prior use-cases. Thus, treat those test cases as: verifier
+ behavior that is not tracked in test_verifier.c could potentially
+ be subject to change.
+
+Q: When should I add code to samples/bpf/ and when to BPF kernel
+ selftests?
+
+A: In general, we prefer additions to BPF kernel selftests rather than
+ samples/bpf/. The rationale is very simple: kernel selftests are
+ regularly run by various bots to test for kernel regressions.
+
+ The more test cases we add to BPF selftests, the better the coverage
+ and the less likely it is that those could accidentally break. It is
+ not that BPF kernel selftests cannot demo how a specific feature can
+ be used.
+
+ That said, samples/bpf/ may be a good place for people to get started,
+ so it might be advisable that simple demos of features could go into
+ samples/bpf/, but advanced functional and corner-case testing rather
+ into kernel selftests.
+
+ If your sample looks like a test case, then go for BPF kernel selftests
+ instead!
+
+Q: When should I add code to the bpftool?
+
+A: The main purpose of bpftool (under tools/bpf/bpftool/) is to provide
+ a central user space tool for debugging and introspection of BPF programs
+ and maps that are active in the kernel. If UAPI changes related to BPF
+ enable for dumping additional information of programs or maps, then
+ bpftool should be extended as well to support dumping them.
+
+Q: When should I add code to iproute2's BPF loader?
+
+A: For UAPI changes related to the XDP or tc layer (e.g. cls_bpf), the
+ convention is that those control-path related changes are added to
+ iproute2's BPF loader as well from user space side. This is not only
+ useful to have UAPI changes properly designed to be usable, but also
+ to make those changes available to a wider user base of major
+ downstream distributions.
+
+Q: Do you accept patches as well for iproute2's BPF loader?
+
+A: Patches for the iproute2's BPF loader have to be sent to:
+
+ netdev@vger.kernel.org
+
+ While those patches are not processed by the BPF kernel maintainers,
+ please keep them in Cc as well, so they can be reviewed.
+
+ The official git repository for iproute2 is run by Stephen Hemminger
+ and can be found at:
+
+ https://git.kernel.org/pub/scm/linux/kernel/git/shemminger/iproute2.git/
+
+ The patches need to have a subject prefix of '[PATCH iproute2 master]'
+ or '[PATCH iproute2 net-next]'. 'master' or 'net-next' describes the
+ target branch where the patch should be applied to. Meaning, if kernel
+ changes went into the net-next kernel tree, then the related iproute2
+ changes need to go into the iproute2 net-next branch, otherwise they
+ can be targeted at master branch. The iproute2 net-next branch will get
+ merged into the master branch after the current iproute2 version from
+ master has been released.
+
+ Like BPF, the patches end up in patchwork under the netdev project and
+ are delegated to 'shemminger' for further processing:
+
+ http://patchwork.ozlabs.org/project/netdev/list/?delegate=389
+
+Q: What is the minimum requirement before I submit my BPF patches?
+
+A: When submitting patches, always take the time and properly test your
+ patches *prior* to submission. Never rush them! If maintainers find
+ that your patches have not been properly tested, it is a good way to
+ get them grumpy. Testing patch submissions is a hard requirement!
+
+ Note, fixes that go to bpf tree *must* have a Fixes: tag included. The
+ same applies to fixes that target bpf-next, where the affected commit
+ is in net-next (or in some cases bpf-next). The Fixes: tag is crucial
+ in order to identify follow-up commits and tremendously helps for people
+ having to do backporting, so it is a must have!
+
+ We also don't accept patches with an empty commit message. Take your
+ time and properly write up a high quality commit message, it is
+ essential!
+
+ Think about it this way: other developers looking at your code a month
+ from now need to understand *why* a certain change has been done that
+ way, and whether there have been flaws in the analysis or assumptions
+ that the original author did. Thus providing a proper rationale and
+ describing the use-case for the changes is a must.
+
+ Patch submissions with >1 patch must have a cover letter which includes
+ a high level description of the series. This high level summary will
+ then be placed into the merge commit by the BPF maintainers such that
+ it is also accessible from the git log for future reference.
+
+Q: What do I need to consider when adding a new instruction or feature
+ that would require BPF JIT and/or LLVM integration as well?
+
+A: We try hard to keep all BPF JITs up to date such that the same user
+ experience can be guaranteed when running BPF programs on different
+ architectures without having the program punt to the less efficient
+ interpreter in case the in-kernel BPF JIT is enabled.
+
+ If you are unable to implement or test the required JIT changes for
+ certain architectures, please work together with the related BPF JIT
+ developers in order to get the feature implemented in a timely manner.
+ Please refer to the git log (arch/*/net/) to locate the necessary
+ people for helping out.
+
+ Also always make sure to add BPF test cases (e.g. test_bpf.c and
+ test_verifier.c) for new instructions, so that they can receive
+ broad test coverage and help run-time testing the various BPF JITs.
+
+ In case of new BPF instructions, once the changes have been accepted
+ into the Linux kernel, please implement support into LLVM's BPF back
+ end. See LLVM section below for further information.
+
+Stable submission:
+------------------
+
+Q: I need a specific BPF commit in stable kernels. What should I do?
+
+A: In case you need a specific fix in stable kernels, first check whether
+ the commit has already been applied in the related linux-*.y branches:
+
+ https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git/
+
+ If not the case, then drop an email to the BPF maintainers with the
+ netdev kernel mailing list in Cc and ask for the fix to be queued up:
+
+ netdev@vger.kernel.org
+
+ The process in general is the same as on netdev itself, see also the
+ netdev FAQ document.
+
+Q: Do you also backport to kernels not currently maintained as stable?
+
+A: No. If you need a specific BPF commit in kernels that are currently not
+ maintained by the stable maintainers, then you are on your own.
+
+ The current stable and longterm stable kernels are all listed here:
+
+ https://www.kernel.org/
+
+Q: The BPF patch I am about to submit needs to go to stable as well. What
+ should I do?
+
+A: The same rules apply as with netdev patch submissions in general, see
+ netdev FAQ under:
+
+ Documentation/networking/netdev-FAQ.txt
+
+ Never add "Cc: stable@vger.kernel.org" to the patch description, but
+ ask the BPF maintainers to queue the patches instead. This can be done
+ with a note, for example, under the "---" part of the patch which does
+ not go into the git log. Alternatively, this can be done as a simple
+ request by mail instead.
+
+Q: Where do I find currently queued BPF patches that will be submitted
+ to stable?
+
+A: Once patches that fix critical bugs got applied into the bpf tree, they
+ are queued up for stable submission under:
+
+ http://patchwork.ozlabs.org/bundle/bpf/stable/?state=*
+
+ They will be on hold there at minimum until the related commit made its
+ way into the mainline kernel tree.
+
+ After having been under broader exposure, the queued patches will be
+ submitted by the BPF maintainers to the stable maintainers.
+
+Testing patches:
+----------------
+
+Q: Which BPF kernel selftests version should I run my kernel against?
+
+A: If you run a kernel xyz, then always run the BPF kernel selftests from
+ that kernel xyz as well. Do not expect that the BPF selftest from the
+ latest mainline tree will pass all the time.
+
+ In particular, test_bpf.c and test_verifier.c have a large number of
+ test cases and are constantly updated with new BPF test sequences, or
+ existing ones are adapted to verifier changes e.g. due to verifier
+ becoming smarter and being able to better track certain things.
+
+LLVM:
+-----
+
+Q: Where do I find LLVM with BPF support?
+
+A: The BPF back end for LLVM is upstream in LLVM since version 3.7.1.
+
+ All major distributions these days ship LLVM with BPF back end enabled,
+ so for the majority of use-cases it is not required to compile LLVM by
+ hand anymore, just install the distribution provided package.
+
+ LLVM's static compiler lists the supported targets through 'llc --version',
+ make sure BPF targets are listed. Example:
+
+ $ llc --version
+ LLVM (http://llvm.org/):
+ LLVM version 6.0.0svn
+ Optimized build.
+ Default target: x86_64-unknown-linux-gnu
+ Host CPU: skylake
+
+ Registered Targets:
+ bpf - BPF (host endian)
+ bpfeb - BPF (big endian)
+ bpfel - BPF (little endian)
+ x86 - 32-bit X86: Pentium-Pro and above
+ x86-64 - 64-bit X86: EM64T and AMD64
+
+ For developers in order to utilize the latest features added to LLVM's
+ BPF back end, it is advisable to run the latest LLVM releases. Support
+ for new BPF kernel features such as additions to the BPF instruction
+ set are often developed together.
+
+ All LLVM releases can be found at: http://releases.llvm.org/
+
+Q: Got it, so how do I build LLVM manually anyway?
+
+A: You need cmake and gcc-c++ as build requisites for LLVM. Once you have
+ that set up, proceed with building the latest LLVM and clang version
+ from the git repositories:
+
+ $ git clone http://llvm.org/git/llvm.git
+ $ cd llvm/tools
+ $ git clone --depth 1 http://llvm.org/git/clang.git
+ $ cd ..; mkdir build; cd build
+ $ cmake .. -DLLVM_TARGETS_TO_BUILD="BPF;X86" \
+ -DBUILD_SHARED_LIBS=OFF \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DLLVM_BUILD_RUNTIME=OFF
+ $ make -j $(getconf _NPROCESSORS_ONLN)
+
+ The built binaries can then be found in the build/bin/ directory, where
+ you can point the PATH variable to.
+
+Q: Should I notify BPF kernel maintainers about issues in LLVM's BPF code
+ generation back end or about LLVM generated code that the verifier
+ refuses to accept?
+
+A: Yes, please do! LLVM's BPF back end is a key piece of the whole BPF
+ infrastructure and it ties deeply into verification of programs from the
+ kernel side. Therefore, any issues on either side need to be investigated
+ and fixed whenever necessary.
+
+ Therefore, please make sure to bring them up at netdev kernel mailing
+ list and Cc BPF maintainers for LLVM and kernel bits:
+
+ Yonghong Song <yhs@fb.com>
+ Alexei Starovoitov <ast@kernel.org>
+ Daniel Borkmann <daniel@iogearbox.net>
+
+ LLVM also has an issue tracker where BPF related bugs can be found:
+
+ https://bugs.llvm.org/buglist.cgi?quicksearch=bpf
+
+ However, it is better to reach out through mailing lists with having
+ maintainers in Cc.
+
+Q: I have added a new BPF instruction to the kernel, how can I integrate
+ it into LLVM?
+
+A: LLVM has a -mcpu selector for the BPF back end in order to allow the
+ selection of BPF instruction set extensions. By default the 'generic'
+ processor target is used, which is the base instruction set (v1) of BPF.
+
+ LLVM has an option to select -mcpu=probe where it will probe the host
+ kernel for supported BPF instruction set extensions and selects the
+ optimal set automatically.
+
+ For cross-compilation, a specific version can be select manually as well.
+
+ $ llc -march bpf -mcpu=help
+ Available CPUs for this target:
+
+ generic - Select the generic processor.
+ probe - Select the probe processor.
+ v1 - Select the v1 processor.
+ v2 - Select the v2 processor.
+ [...]
+
+ Newly added BPF instructions to the Linux kernel need to follow the same
+ scheme, bump the instruction set version and implement probing for the
+ extensions such that -mcpu=probe users can benefit from the optimization
+ transparently when upgrading their kernels.
+
+ If you are unable to implement support for the newly added BPF instruction
+ please reach out to BPF developers for help.
+
+ By the way, the BPF kernel selftests run with -mcpu=probe for better
+ test coverage.
+
+Happy BPF hacking!
diff --git a/Documentation/cgroup-v1/cgroups.txt b/Documentation/cgroup-v1/cgroups.txt
index 308e5ff7207a..059f7063eea6 100644
--- a/Documentation/cgroup-v1/cgroups.txt
+++ b/Documentation/cgroup-v1/cgroups.txt
@@ -523,12 +523,7 @@ Accessing a task's cgroup pointer may be done in the following ways:
Each subsystem should:
- add an entry in linux/cgroup_subsys.h
-- define a cgroup_subsys object called <name>_subsys
-
-If a subsystem can be compiled as a module, it should also have in its
-module initcall a call to cgroup_load_subsys(), and in its exitcall a
-call to cgroup_unload_subsys(). It should also set its_subsys.module =
-THIS_MODULE in its .c file.
+- define a cgroup_subsys object called <name>_cgrp_subsys
Each subsystem may export the following methods. The only mandatory
methods are css_alloc/free. Any others that are null are presumed to
diff --git a/Documentation/cgroup-v1/memory.txt b/Documentation/cgroup-v1/memory.txt
index cefb63639070..a4af2e124e24 100644
--- a/Documentation/cgroup-v1/memory.txt
+++ b/Documentation/cgroup-v1/memory.txt
@@ -524,9 +524,9 @@ Note:
Only anonymous and swap cache memory is listed as part of 'rss' stat.
This should not be confused with the true 'resident set size' or the
amount of physical memory used by the cgroup.
- 'rss + file_mapped" will give you resident set size of cgroup.
+ 'rss + mapped_file" will give you resident set size of cgroup.
(Note: file and shmem may be shared among other cgroups. In that case,
- file_mapped is accounted only when the memory cgroup is owner of page
+ mapped_file is accounted only when the memory cgroup is owner of page
cache.)
5.3 swappiness
diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt
index 2cddab7efb20..74cdeaed9f7a 100644
--- a/Documentation/cgroup-v2.txt
+++ b/Documentation/cgroup-v2.txt
@@ -53,10 +53,14 @@ v1 is available under Documentation/cgroup-v1/.
5-3-2. Writeback
5-4. PID
5-4-1. PID Interface Files
- 5-5. RDMA
- 5-5-1. RDMA Interface Files
- 5-6. Misc
- 5-6-1. perf_event
+ 5-5. Device
+ 5-6. RDMA
+ 5-6-1. RDMA Interface Files
+ 5-7. Misc
+ 5-7-1. perf_event
+ 5-N. Non-normative information
+ 5-N-1. CPU controller root cgroup process behaviour
+ 5-N-2. IO controller root cgroup process behaviour
6. Namespace
6-1. Basics
6-2. The Root and Views
@@ -279,7 +283,7 @@ thread mode, the following conditions must be met.
exempt from this requirement.
Topology-wise, a cgroup can be in an invalid state. Please consider
-the following toplogy::
+the following topology::
A (threaded domain) - B (threaded) - C (domain, just created)
@@ -420,7 +424,9 @@ The root cgroup is exempt from this restriction. Root contains
processes and anonymous resource consumption which can't be associated
with any other cgroups and requires special treatment from most
controllers. How resource consumption in the root cgroup is governed
-is up to each controller.
+is up to each controller (for more information on this topic please
+refer to the Non-normative information section in the Controllers
+chapter).
Note that the restriction doesn't get in the way if there is no
enabled controller in the cgroup's "cgroup.subtree_control". This is
@@ -1063,10 +1069,10 @@ PAGE_SIZE multiple when read back.
reached the limit and allocation was about to fail.
Depending on context result could be invocation of OOM
- killer and retrying allocation or failing alloction.
+ killer and retrying allocation or failing allocation.
Failed allocation in its turn could be returned into
- userspace as -ENOMEM or siletly ignored in cases like
+ userspace as -ENOMEM or silently ignored in cases like
disk readahead. For now OOM in memory cgroup kills
tasks iff shortage has happened inside page fault.
@@ -1191,7 +1197,7 @@ PAGE_SIZE multiple when read back.
cgroups. The default is "max".
Swap usage hard limit. If a cgroup's swap usage reaches this
- limit, anonymous meomry of the cgroup will not be swapped out.
+ limit, anonymous memory of the cgroup will not be swapped out.
Usage Guidelines
@@ -1429,6 +1435,30 @@ through fork() or clone(). These will return -EAGAIN if the creation
of a new process would cause a cgroup policy to be violated.
+Device controller
+-----------------
+
+Device controller manages access to device files. It includes both
+creation of new device files (using mknod), and access to the
+existing device files.
+
+Cgroup v2 device controller has no interface files and is implemented
+on top of cgroup BPF. To control access to device files, a user may
+create bpf programs of the BPF_CGROUP_DEVICE type and attach them
+to cgroups. On an attempt to access a device file, corresponding
+BPF programs will be executed, and depending on the return value
+the attempt will succeed or fail with -EPERM.
+
+A BPF_CGROUP_DEVICE program takes a pointer to the bpf_cgroup_dev_ctx
+structure, which describes the device access attempt: access type
+(mknod/read/write) and device (type, major and minor numbers).
+If the program returns 0, the attempt fails with -EPERM, otherwise
+it succeeds.
+
+An example of BPF_CGROUP_DEVICE program may be found in the kernel
+source tree in the tools/testing/selftests/bpf/dev_cgroup.c file.
+
+
RDMA
----
@@ -1481,6 +1511,35 @@ always be filtered by cgroup v2 path. The controller can still be
moved to a legacy hierarchy after v2 hierarchy is populated.
+Non-normative information
+-------------------------
+
+This section contains information that isn't considered to be a part of
+the stable kernel API and so is subject to change.
+
+
+CPU controller root cgroup process behaviour
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When distributing CPU cycles in the root cgroup each thread in this
+cgroup is treated as if it was hosted in a separate child cgroup of the
+root cgroup. This child cgroup weight is dependent on its thread nice
+level.
+
+For details of this mapping see sched_prio_to_weight array in
+kernel/sched/core.c file (values from this array should be scaled
+appropriately so the neutral - nice 0 - value is 100 instead of 1024).
+
+
+IO controller root cgroup process behaviour
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Root cgroup processes are hosted in an implicit leaf child node.
+When distributing IO resources this implicit child node is taken into
+account as if it was a normal child cgroup of the root cgroup with a
+weight value of 200.
+
+
Namespace
=========
diff --git a/Documentation/circular-buffers.txt b/Documentation/circular-buffers.txt
index d4628174b7c5..53e51caa3347 100644
--- a/Documentation/circular-buffers.txt
+++ b/Documentation/circular-buffers.txt
@@ -220,8 +220,7 @@ before it writes the new tail pointer, which will erase the item.
Note the use of READ_ONCE() and smp_load_acquire() to read the
opposition index. This prevents the compiler from discarding and
-reloading its cached value - which some compilers will do across
-smp_read_barrier_depends(). This isn't strictly needed if you can
+reloading its cached value. This isn't strictly needed if you can
be sure that the opposition index will _only_ be used the once.
The smp_load_acquire() additionally forces the CPU to order against
subsequent memory references. Similarly, smp_store_release() is used
diff --git a/Documentation/conf.py b/Documentation/conf.py
index 63857d33778c..62ac5a9f3a9f 100644
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -88,7 +88,6 @@ finally:
if makefile_version and makefile_patchlevel:
version = release = makefile_version + '.' + makefile_patchlevel
else:
- sys.stderr.write('Warning: Could not extract kernel version\n')
version = release = "unknown version"
# The language for content autogenerated by Sphinx. Refer to documentation
diff --git a/Documentation/errseq.rst b/Documentation/core-api/errseq.rst
index 4c29bd5afbc5..ff332e272405 100644
--- a/Documentation/errseq.rst
+++ b/Documentation/core-api/errseq.rst
@@ -1,5 +1,7 @@
+=====================
The errseq_t datatype
=====================
+
An errseq_t is a way of recording errors in one place, and allowing any
number of "subscribers" to tell whether it has changed since a previous
point where it was sampled.
@@ -21,12 +23,13 @@ a flag to tell whether the value has been sampled since a new value was
recorded. That allows us to avoid bumping the counter if no one has
sampled it since the last time an error was recorded.
-Thus we end up with a value that looks something like this::
+Thus we end up with a value that looks something like this:
- bit: 31..13 12 11..0
- +-----------------+----+----------------+
- | counter | SF | errno |
- +-----------------+----+----------------+
++--------------------------------------+----+------------------------+
+| 31..13 | 12 | 11..0 |
++--------------------------------------+----+------------------------+
+| counter | SF | errno |
++--------------------------------------+----+------------------------+
The general idea is for "watchers" to sample an errseq_t value and keep
it as a running cursor. That value can later be used to tell whether
@@ -42,6 +45,7 @@ has ever been an error set since it was first initialized.
API usage
=========
+
Let me tell you a story about a worker drone. Now, he's a good worker
overall, but the company is a little...management heavy. He has to
report to 77 supervisors today, and tomorrow the "big boss" is coming in
@@ -125,6 +129,7 @@ not usable by anyone else.
Serializing errseq_t cursor updates
===================================
+
Note that the errseq_t API does not protect the errseq_t cursor during a
check_and_advance_operation. Only the canonical error code is handled
atomically. In a situation where more than one task might be using the
@@ -147,3 +152,8 @@ errseq_check_and_advance after taking the lock. e.g.::
That avoids the spinlock in the common case where nothing has changed
since the last time it was checked.
+
+Functions
+=========
+
+.. kernel-doc:: lib/errseq.c
diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst
index d5bbe035316d..1b1fd01990b5 100644
--- a/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@@ -14,6 +14,7 @@ Core utilities
kernel-api
assoc_array
atomic_ops
+ refcount-vs-atomic
cpu_hotplug
local_ops
workqueue
@@ -21,6 +22,8 @@ Core utilities
flexible-arrays
librs
genalloc
+ errseq
+ printk-formats
Interfaces for kernel debugging
===============================
diff --git a/Documentation/core-api/kernel-api.rst b/Documentation/core-api/kernel-api.rst
index 2d9da6c40a4d..e7fadf02c511 100644
--- a/Documentation/core-api/kernel-api.rst
+++ b/Documentation/core-api/kernel-api.rst
@@ -139,6 +139,21 @@ Division Functions
.. kernel-doc:: lib/gcd.c
:export:
+Sorting
+-------
+
+.. kernel-doc:: lib/sort.c
+ :export:
+
+.. kernel-doc:: lib/list_sort.c
+ :export:
+
+UUID/GUID
+---------
+
+.. kernel-doc:: lib/uuid.c
+ :export:
+
Memory Management in Linux
==========================
diff --git a/Documentation/printk-formats.txt b/Documentation/core-api/printk-formats.rst
index aa0a776c817a..258b46435320 100644
--- a/Documentation/printk-formats.txt
+++ b/Documentation/core-api/printk-formats.rst
@@ -5,6 +5,7 @@ How to get printk format specifiers right
:Author: Randy Dunlap <rdunlap@infradead.org>
:Author: Andrew Murray <amurray@mpc-data.co.uk>
+
Integer types
=============
@@ -25,39 +26,45 @@ Integer types
s64 %lld or %llx
u64 %llu or %llx
-If <type> is dependent on a config option for its size (e.g., ``sector_t``,
-``blkcnt_t``) or is architecture-dependent for its size (e.g., ``tcflag_t``),
-use a format specifier of its largest possible type and explicitly cast to it.
+
+If <type> is dependent on a config option for its size (e.g., sector_t,
+blkcnt_t) or is architecture-dependent for its size (e.g., tcflag_t), use a
+format specifier of its largest possible type and explicitly cast to it.
Example::
printk("test: sector number/total blocks: %llu/%llu\n",
(unsigned long long)sector, (unsigned long long)blockcount);
-Reminder: ``sizeof()`` result is of type ``size_t``.
+Reminder: sizeof() returns type size_t.
-The kernel's printf does not support ``%n``. For obvious reasons, floating
-point formats (``%e, %f, %g, %a``) are also not recognized. Use of any
+The kernel's printf does not support %n. Floating point formats (%e, %f,
+%g, %a) are also not recognized, for obvious reasons. Use of any
unsupported specifier or length qualifier results in a WARN and early
-return from vsnprintf.
-
-Raw pointer value SHOULD be printed with %p. The kernel supports
-the following extended format specifiers for pointer types:
+return from vsnprintf().
-Pointer Types
+Pointer types
=============
-Pointers printed without a specifier extension (i.e unadorned %p) are
-hashed to give a unique identifier without leaking kernel addresses to user
-space. On 64 bit machines the first 32 bits are zeroed. If you _really_
-want the address see %px below.
+A raw pointer value may be printed with %p which will hash the address
+before printing. The kernel also supports extended specifiers for printing
+pointers of different types.
+
+Plain Pointers
+--------------
::
%p abcdef12 or 00000000abcdef12
+Pointers printed without a specifier extension (i.e unadorned %p) are
+hashed to prevent leaking information about the kernel memory layout. This
+has the added benefit of providing a unique identifier. On 64-bit machines
+the first 32 bits are zeroed. If you *really* want the address see %px
+below.
+
Symbols/Function Pointers
-=========================
+-------------------------
::
@@ -69,6 +76,7 @@ Symbols/Function Pointers
%ps versatile_init
%pB prev_fn_of_versatile_init+0x88/0x88
+
The ``F`` and ``f`` specifiers are for printing function pointers,
for example, f->func, &gettimeofday. They have the same result as
``S`` and ``s`` specifiers. But they do an extra conversion on
@@ -77,14 +85,14 @@ are actually function descriptors.
The ``S`` and ``s`` specifiers can be used for printing symbols
from direct addresses, for example, __builtin_return_address(0),
-(void *)regs->ip. They result in the symbol name with (``S``) or
-without (``s``) offsets. If KALLSYMS are disabled then the symbol
+(void *)regs->ip. They result in the symbol name with (S) or
+without (s) offsets. If KALLSYMS are disabled then the symbol
address is printed instead.
The ``B`` specifier results in the symbol name with offsets and should be
used when printing stack backtraces. The specifier takes into
consideration the effect of compiler optimisations which may occur
-when tail-call``s are used and marked with the noreturn GCC attribute.
+when tail-calls are used and marked with the noreturn GCC attribute.
Examples::
@@ -97,33 +105,32 @@ Examples::
printk(" %s%pB\n", (reliable ? "" : "? "), (void *)*stack);
Kernel Pointers
-===============
+---------------
::
%pK 01234567 or 0123456789abcdef
For printing kernel pointers which should be hidden from unprivileged
-users. The behaviour of ``%pK`` depends on the ``kptr_restrict sysctl`` - see
+users. The behaviour of %pK depends on the kptr_restrict sysctl - see
Documentation/sysctl/kernel.txt for more details.
Unmodified Addresses
-====================
+--------------------
::
%px 01234567 or 0123456789abcdef
-For printing pointers when you _really_ want to print the address. Please
+For printing pointers when you *really* want to print the address. Please
consider whether or not you are leaking sensitive information about the
-Kernel layout in memory before printing pointers with %px. %px is
-functionally equivalent to %lx. %px is preferred to %lx because it is more
-uniquely grep'able. If, in the future, we need to modify the way the Kernel
-handles printing pointers it will be nice to be able to find the call
-sites.
+kernel memory layout before printing pointers with %px. %px is functionally
+equivalent to %lx (or %lu). %px is preferred because it is more uniquely
+grep'able. If in the future we need to modify the way the kernel handles
+printing pointers we will be better equipped to find the call sites.
Struct Resources
-================
+----------------
::
@@ -133,32 +140,37 @@ Struct Resources
[mem 0x0000000060000000-0x000000006fffffff pref]
For printing struct resources. The ``R`` and ``r`` specifiers result in a
-printed resource with (``R``) or without (``r``) a decoded flags member.
+printed resource with (R) or without (r) a decoded flags member.
+
Passed by reference.
-Physical addresses types ``phys_addr_t``
-========================================
+Physical address types phys_addr_t
+----------------------------------
::
%pa[p] 0x01234567 or 0x0123456789abcdef
-For printing a ``phys_addr_t`` type (and its derivatives, such as
-``resource_size_t``) which can vary based on build options, regardless of
-the width of the CPU data path. Passed by reference.
+For printing a phys_addr_t type (and its derivatives, such as
+resource_size_t) which can vary based on build options, regardless of the
+width of the CPU data path.
+
+Passed by reference.
-DMA addresses types ``dma_addr_t``
-==================================
+DMA address types dma_addr_t
+----------------------------
::
%pad 0x01234567 or 0x0123456789abcdef
-For printing a ``dma_addr_t`` type which can vary based on build options,
-regardless of the width of the CPU data path. Passed by reference.
+For printing a dma_addr_t type which can vary based on build options,
+regardless of the width of the CPU data path.
+
+Passed by reference.
Raw buffer as an escaped string
-===============================
+-------------------------------
::
@@ -168,8 +180,8 @@ For printing raw buffer as an escaped string. For the following buffer::
1b 62 20 5c 43 07 22 90 0d 5d
-few examples show how the conversion would be done (the result string
-without surrounding quotes)::
+A few examples show how the conversion would be done (excluding surrounding
+quotes)::
%*pE "\eb \C\a"\220\r]"
%*pEhp "\x1bb \C\x07"\x90\x0d]"
@@ -179,23 +191,23 @@ The conversion rules are applied according to an optional combination
of flags (see :c:func:`string_escape_mem` kernel documentation for the
details):
- - ``a`` - ESCAPE_ANY
- - ``c`` - ESCAPE_SPECIAL
- - ``h`` - ESCAPE_HEX
- - ``n`` - ESCAPE_NULL
- - ``o`` - ESCAPE_OCTAL
- - ``p`` - ESCAPE_NP
- - ``s`` - ESCAPE_SPACE
+ - a - ESCAPE_ANY
+ - c - ESCAPE_SPECIAL
+ - h - ESCAPE_HEX
+ - n - ESCAPE_NULL
+ - o - ESCAPE_OCTAL
+ - p - ESCAPE_NP
+ - s - ESCAPE_SPACE
By default ESCAPE_ANY_NP is used.
ESCAPE_ANY_NP is the sane choice for many cases, in particularly for
printing SSIDs.
-If field width is omitted the 1 byte only will be escaped.
+If field width is omitted then 1 byte only will be escaped.
Raw buffer as a hex string
-==========================
+--------------------------
::
@@ -204,12 +216,12 @@ Raw buffer as a hex string
%*phD 00-01-02- ... -3f
%*phN 000102 ... 3f
-For printing a small buffers (up to 64 bytes long) as a hex string with
-certain separator. For the larger buffers consider to use
+For printing small buffers (up to 64 bytes long) as a hex string with a
+certain separator. For larger buffers consider using
:c:func:`print_hex_dump`.
MAC/FDDI addresses
-==================
+------------------
::
@@ -220,11 +232,11 @@ MAC/FDDI addresses
%pmR 050403020100
For printing 6-byte MAC/FDDI addresses in hex notation. The ``M`` and ``m``
-specifiers result in a printed address with (``M``) or without (``m``) byte
-separators. The default byte separator is the colon (``:``).
+specifiers result in a printed address with (M) or without (m) byte
+separators. The default byte separator is the colon (:).
Where FDDI addresses are concerned the ``F`` specifier can be used after
-the ``M`` specifier to use dash (``-``) separators instead of the default
+the ``M`` specifier to use dash (-) separators instead of the default
separator.
For Bluetooth addresses the ``R`` specifier shall be used after the ``M``
@@ -234,7 +246,7 @@ of Bluetooth addresses which are in the little endian order.
Passed by reference.
IPv4 addresses
-==============
+--------------
::
@@ -243,8 +255,8 @@ IPv4 addresses
%p[Ii]4[hnbl]
For printing IPv4 dot-separated decimal addresses. The ``I4`` and ``i4``
-specifiers result in a printed address with (``i4``) or without (``I4``)
-leading zeros.
+specifiers result in a printed address with (i4) or without (I4) leading
+zeros.
The additional ``h``, ``n``, ``b``, and ``l`` specifiers are used to specify
host, network, big or little endian order addresses respectively. Where
@@ -253,7 +265,7 @@ no specifier is provided the default network/big endian order is used.
Passed by reference.
IPv6 addresses
-==============
+--------------
::
@@ -262,7 +274,7 @@ IPv6 addresses
%pI6c 1:2:3:4:5:6:7:8
For printing IPv6 network-order 16-bit hex addresses. The ``I6`` and ``i6``
-specifiers result in a printed address with (``I6``) or without (``i6``)
+specifiers result in a printed address with (I6) or without (i6)
colon-separators. Leading zeros are always used.
The additional ``c`` specifier can be used with the ``I`` specifier to
@@ -272,7 +284,7 @@ http://tools.ietf.org/html/rfc5952
Passed by reference.
IPv4/IPv6 addresses (generic, with port, flowinfo, scope)
-=========================================================
+---------------------------------------------------------
::
@@ -282,8 +294,8 @@ IPv4/IPv6 addresses (generic, with port, flowinfo, scope)
%pISpc 1.2.3.4:12345 or [1:2:3:4:5:6:7:8]:12345
%p[Ii]S[pfschnbl]
-For printing an IP address without the need to distinguish whether it``s
-of type AF_INET or AF_INET6, a pointer to a valid ``struct sockaddr``,
+For printing an IP address without the need to distinguish whether it's of
+type AF_INET or AF_INET6. A pointer to a valid struct sockaddr,
specified through ``IS`` or ``iS``, can be passed to this format specifier.
The additional ``p``, ``f``, and ``s`` specifiers are used to specify port
@@ -309,7 +321,7 @@ Further examples::
%pISpfc 1.2.3.4:12345 or [1:2:3:4:5:6:7:8]:12345/123456789
UUID/GUID addresses
-===================
+-------------------
::
@@ -318,33 +330,33 @@ UUID/GUID addresses
%pUl 03020100-0504-0706-0809-0a0b0c0e0e0f
%pUL 03020100-0504-0706-0809-0A0B0C0E0E0F
-For printing 16-byte UUID/GUIDs addresses. The additional 'l', 'L',
-'b' and 'B' specifiers are used to specify a little endian order in
-lower ('l') or upper case ('L') hex characters - and big endian order
-in lower ('b') or upper case ('B') hex characters.
+For printing 16-byte UUID/GUIDs addresses. The additional ``l``, ``L``,
+``b`` and ``B`` specifiers are used to specify a little endian order in
+lower (l) or upper case (L) hex notation - and big endian order in lower (b)
+or upper case (B) hex notation.
Where no additional specifiers are used the default big endian
-order with lower case hex characters will be printed.
+order with lower case hex notation will be printed.
Passed by reference.
dentry names
-============
+------------
::
%pd{,2,3,4}
%pD{,2,3,4}
-For printing dentry name; if we race with :c:func:`d_move`, the name might be
-a mix of old and new ones, but it won't oops. ``%pd`` dentry is a safer
-equivalent of ``%s`` ``dentry->d_name.name`` we used to use, ``%pd<n>`` prints
-``n`` last components. ``%pD`` does the same thing for struct file.
+For printing dentry name; if we race with :c:func:`d_move`, the name might
+be a mix of old and new ones, but it won't oops. %pd dentry is a safer
+equivalent of %s dentry->d_name.name we used to use, %pd<n> prints ``n``
+last components. %pD does the same thing for struct file.
Passed by reference.
block_device names
-==================
+------------------
::
@@ -353,7 +365,7 @@ block_device names
For printing name of block_device pointers.
struct va_format
-================
+----------------
::
@@ -375,31 +387,27 @@ correctness of the format string and va_list arguments.
Passed by reference.
kobjects
-========
+--------
::
- %pO
+ %pOF[fnpPcCF]
- Base specifier for kobject based structs. Must be followed with
- character for specific type of kobject as listed below:
- Device tree nodes:
+For printing kobject based structs (device nodes). Default behaviour is
+equivalent to %pOFf.
- %pOF[fnpPcCF]
+ - f - device node full_name
+ - n - device node name
+ - p - device node phandle
+ - P - device node path spec (name + @unit)
+ - F - device node flags
+ - c - major compatible string
+ - C - full compatible string
- For printing device tree nodes. The optional arguments are:
- f device node full_name
- n device node name
- p device node phandle
- P device node path spec (name + @unit)
- F device node flags
- c major compatible string
- C full compatible string
- Without any arguments prints full_name (same as %pOFf)
- The separator when using multiple arguments is ':'
+The separator when using multiple arguments is ':'
- Examples:
+Examples::
%pOF /foo/bar@0 - Node full name
%pOFf /foo/bar@0 - Same as above
@@ -412,11 +420,10 @@ kobjects
P - Populated
B - Populated bus
- Passed by reference.
-
+Passed by reference.
struct clk
-==========
+----------
::
@@ -424,14 +431,14 @@ struct clk
%pCn pll1
%pCr 1560000000
-For printing struct clk structures. ``%pC`` and ``%pCn`` print the name
+For printing struct clk structures. %pC and %pCn print the name
(Common Clock Framework) or address (legacy clock framework) of the
-structure; ``%pCr`` prints the current clock rate.
+structure; %pCr prints the current clock rate.
Passed by reference.
bitmap and its derivatives such as cpumask and nodemask
-=======================================================
+-------------------------------------------------------
::
@@ -439,13 +446,13 @@ bitmap and its derivatives such as cpumask and nodemask
%*pbl 0,3-6,8-10
For printing bitmap and its derivatives such as cpumask and nodemask,
-``%*pb`` output the bitmap with field width as the number of bits and ``%*pbl``
+%*pb outputs the bitmap with field width as the number of bits and %*pbl
output the bitmap as range list with field width as the number of bits.
Passed by reference.
Flags bitfields such as page flags, gfp_flags
-=============================================
+---------------------------------------------
::
@@ -459,14 +466,14 @@ character. Currently supported are [p]age flags, [v]ma_flags (both
expect ``unsigned long *``) and [g]fp_flags (expects ``gfp_t *``). The flag
names and print order depends on the particular type.
-Note that this format should not be used directly in :c:func:`TP_printk()` part
-of a tracepoint. Instead, use the ``show_*_flags()`` functions from
-<trace/events/mmflags.h>.
+Note that this format should not be used directly in the
+:c:func:`TP_printk()` part of a tracepoint. Instead, use the show_*_flags()
+functions from <trace/events/mmflags.h>.
Passed by reference.
Network device features
-=======================
+-----------------------
::
@@ -476,8 +483,10 @@ For printing netdev_features_t.
Passed by reference.
-If you add other ``%p`` extensions, please extend lib/test_printf.c with
-one or more test cases, if at all feasible.
+Thanks
+======
+If you add other %p extensions, please extend <lib/test_printf.c> with
+one or more test cases, if at all feasible.
Thank you for your cooperation and attention.
diff --git a/Documentation/core-api/refcount-vs-atomic.rst b/Documentation/core-api/refcount-vs-atomic.rst
new file mode 100644
index 000000000000..83351c258cdb
--- /dev/null
+++ b/Documentation/core-api/refcount-vs-atomic.rst
@@ -0,0 +1,150 @@
+===================================
+refcount_t API compared to atomic_t
+===================================
+
+.. contents:: :local:
+
+Introduction
+============
+
+The goal of refcount_t API is to provide a minimal API for implementing
+an object's reference counters. While a generic architecture-independent
+implementation from lib/refcount.c uses atomic operations underneath,
+there are a number of differences between some of the ``refcount_*()`` and
+``atomic_*()`` functions with regards to the memory ordering guarantees.
+This document outlines the differences and provides respective examples
+in order to help maintainers validate their code against the change in
+these memory ordering guarantees.
+
+The terms used through this document try to follow the formal LKMM defined in
+github.com/aparri/memory-model/blob/master/Documentation/explanation.txt
+
+memory-barriers.txt and atomic_t.txt provide more background to the
+memory ordering in general and for atomic operations specifically.
+
+Relevant types of memory ordering
+=================================
+
+.. note:: The following section only covers some of the memory
+ ordering types that are relevant for the atomics and reference
+ counters and used through this document. For a much broader picture
+ please consult memory-barriers.txt document.
+
+In the absence of any memory ordering guarantees (i.e. fully unordered)
+atomics & refcounters only provide atomicity and
+program order (po) relation (on the same CPU). It guarantees that
+each ``atomic_*()`` and ``refcount_*()`` operation is atomic and instructions
+are executed in program order on a single CPU.
+This is implemented using :c:func:`READ_ONCE`/:c:func:`WRITE_ONCE` and
+compare-and-swap primitives.
+
+A strong (full) memory ordering guarantees that all prior loads and
+stores (all po-earlier instructions) on the same CPU are completed
+before any po-later instruction is executed on the same CPU.
+It also guarantees that all po-earlier stores on the same CPU
+and all propagated stores from other CPUs must propagate to all
+other CPUs before any po-later instruction is executed on the original
+CPU (A-cumulative property). This is implemented using :c:func:`smp_mb`.
+
+A RELEASE memory ordering guarantees that all prior loads and
+stores (all po-earlier instructions) on the same CPU are completed
+before the operation. It also guarantees that all po-earlier
+stores on the same CPU and all propagated stores from other CPUs
+must propagate to all other CPUs before the release operation
+(A-cumulative property). This is implemented using
+:c:func:`smp_store_release`.
+
+A control dependency (on success) for refcounters guarantees that
+if a reference for an object was successfully obtained (reference
+counter increment or addition happened, function returned true),
+then further stores are ordered against this operation.
+Control dependency on stores are not implemented using any explicit
+barriers, but rely on CPU not to speculate on stores. This is only
+a single CPU relation and provides no guarantees for other CPUs.
+
+
+Comparison of functions
+=======================
+
+case 1) - non-"Read/Modify/Write" (RMW) ops
+-------------------------------------------
+
+Function changes:
+
+ * :c:func:`atomic_set` --> :c:func:`refcount_set`
+ * :c:func:`atomic_read` --> :c:func:`refcount_read`
+
+Memory ordering guarantee changes:
+
+ * none (both fully unordered)
+
+
+case 2) - increment-based ops that return no value
+--------------------------------------------------
+
+Function changes:
+
+ * :c:func:`atomic_inc` --> :c:func:`refcount_inc`
+ * :c:func:`atomic_add` --> :c:func:`refcount_add`
+
+Memory ordering guarantee changes:
+
+ * none (both fully unordered)
+
+case 3) - decrement-based RMW ops that return no value
+------------------------------------------------------
+
+Function changes:
+
+ * :c:func:`atomic_dec` --> :c:func:`refcount_dec`
+
+Memory ordering guarantee changes:
+
+ * fully unordered --> RELEASE ordering
+
+
+case 4) - increment-based RMW ops that return a value
+-----------------------------------------------------
+
+Function changes:
+
+ * :c:func:`atomic_inc_not_zero` --> :c:func:`refcount_inc_not_zero`
+ * no atomic counterpart --> :c:func:`refcount_add_not_zero`
+
+Memory ordering guarantees changes:
+
+ * fully ordered --> control dependency on success for stores
+
+.. note:: We really assume here that necessary ordering is provided as a
+ result of obtaining pointer to the object!
+
+
+case 5) - decrement-based RMW ops that return a value
+-----------------------------------------------------
+
+Function changes:
+
+ * :c:func:`atomic_dec_and_test` --> :c:func:`refcount_dec_and_test`
+ * :c:func:`atomic_sub_and_test` --> :c:func:`refcount_sub_and_test`
+ * no atomic counterpart --> :c:func:`refcount_dec_if_one`
+ * ``atomic_add_unless(&var, -1, 1)`` --> ``refcount_dec_not_one(&var)``
+
+Memory ordering guarantees changes:
+
+ * fully ordered --> RELEASE ordering + control dependency
+
+.. note:: :c:func:`atomic_add_unless` only provides full order on success.
+
+
+case 6) - lock-based RMW
+------------------------
+
+Function changes:
+
+ * :c:func:`atomic_dec_and_lock` --> :c:func:`refcount_dec_and_lock`
+ * :c:func:`atomic_dec_and_mutex_lock` --> :c:func:`refcount_dec_and_mutex_lock`
+
+Memory ordering guarantees changes:
+
+ * fully ordered --> RELEASE ordering + control dependency + hold
+ :c:func:`spin_lock` on success
diff --git a/Documentation/device-mapper/cache-policies.txt b/Documentation/device-mapper/cache-policies.txt
index d3ca8af21a31..86786d87d9a8 100644
--- a/Documentation/device-mapper/cache-policies.txt
+++ b/Documentation/device-mapper/cache-policies.txt
@@ -60,7 +60,7 @@ Memory usage:
The mq policy used a lot of memory; 88 bytes per cache block on a 64
bit machine.
-smq uses 28bit indexes to implement it's data structures rather than
+smq uses 28bit indexes to implement its data structures rather than
pointers. It avoids storing an explicit hit count for each block. It
has a 'hotspot' queue, rather than a pre-cache, which uses a quarter of
the entries (each hotspot block covers a larger area than a single
@@ -84,7 +84,7 @@ resulting in better promotion/demotion decisions.
Adaptability:
The mq policy maintained a hit count for each cache block. For a
-different block to get promoted to the cache it's hit count has to
+different block to get promoted to the cache its hit count has to
exceed the lowest currently in the cache. This meant it could take a
long time for the cache to adapt between varying IO patterns.
diff --git a/Documentation/device-mapper/cache.txt b/Documentation/device-mapper/cache.txt
index cdfd0feb294e..ff0841711fd5 100644
--- a/Documentation/device-mapper/cache.txt
+++ b/Documentation/device-mapper/cache.txt
@@ -59,7 +59,7 @@ Fixed block size
The origin is divided up into blocks of a fixed size. This block size
is configurable when you first create the cache. Typically we've been
using block sizes of 256KB - 1024KB. The block size must be between 64
-(32KB) and 2097152 (1GB) and a multiple of 64 (32KB).
+sectors (32KB) and 2097152 sectors (1GB) and a multiple of 64 sectors (32KB).
Having a fixed block size simplifies the target a lot. But it is
something of a compromise. For instance, a small part of a block may be
@@ -119,7 +119,7 @@ doing here to avoid migrating during those peak io moments.
For the time being, a message "migration_threshold <#sectors>"
can be used to set the maximum number of sectors being migrated,
-the default being 204800 sectors (or 100MB).
+the default being 2048 sectors (1MB).
Updating on-disk metadata
-------------------------
@@ -143,11 +143,6 @@ the policy how big this chunk is, but it should be kept small. Like the
dirty flags this data is lost if there's a crash so a safe fallback
value should always be possible.
-For instance, the 'mq' policy, which is currently the default policy,
-uses this facility to store the hit count of the cache blocks. If
-there's a crash this information will be lost, which means the cache
-may be less efficient until those hit counts are regenerated.
-
Policy hints affect performance, not correctness.
Policy messaging
diff --git a/Documentation/device-mapper/dm-raid.txt b/Documentation/device-mapper/dm-raid.txt
index 32df07e29f68..390c145f01d7 100644
--- a/Documentation/device-mapper/dm-raid.txt
+++ b/Documentation/device-mapper/dm-raid.txt
@@ -343,5 +343,8 @@ Version History
1.11.0 Fix table line argument order
(wrong raid10_copies/raid10_format sequence)
1.11.1 Add raid4/5/6 journal write-back support via journal_mode option
-1.12.1 fix for MD deadlock between mddev_suspend() and md_write_start() available
+1.12.1 Fix for MD deadlock between mddev_suspend() and md_write_start() available
1.13.0 Fix dev_health status at end of "recover" (was 'a', now 'A')
+1.13.1 Fix deadlock caused by early md_stop_writes(). Also fix size an
+ state races.
+1.13.2 Fix raid redundancy validation and avoid keeping raid set frozen
diff --git a/Documentation/device-mapper/snapshot.txt b/Documentation/device-mapper/snapshot.txt
index ad6949bff2e3..b8bbb516f989 100644
--- a/Documentation/device-mapper/snapshot.txt
+++ b/Documentation/device-mapper/snapshot.txt
@@ -49,6 +49,10 @@ The difference between persistent and transient is with transient
snapshots less metadata must be saved on disk - they can be kept in
memory by the kernel.
+When loading or unloading the snapshot target, the corresponding
+snapshot-origin or snapshot-merge target must be suspended. A failure to
+suspend the origin target could result in data corruption.
+
* snapshot-merge <origin> <COW device> <persistent> <chunksize>
diff --git a/Documentation/device-mapper/thin-provisioning.txt b/Documentation/device-mapper/thin-provisioning.txt
index 1699a55b7b70..4bcd4b7f79f9 100644
--- a/Documentation/device-mapper/thin-provisioning.txt
+++ b/Documentation/device-mapper/thin-provisioning.txt
@@ -112,9 +112,11 @@ $low_water_mark is expressed in blocks of size $data_block_size. If
free space on the data device drops below this level then a dm event
will be triggered which a userspace daemon should catch allowing it to
extend the pool device. Only one such event will be sent.
-Resuming a device with a new table itself triggers an event so the
-userspace daemon can use this to detect a situation where a new table
-already exceeds the threshold.
+
+No special event is triggered if a just resumed device's free space is below
+the low water mark. However, resuming a device always triggers an
+event; a userspace daemon should verify that free space exceeds the low
+water mark when handling this event.
A low water mark for the metadata device is maintained in the kernel and
will trigger a dm event if free space on the metadata device drops below
@@ -274,7 +276,8 @@ ii) Status
<transaction id> <used metadata blocks>/<total metadata blocks>
<used data blocks>/<total data blocks> <held metadata root>
- [no_]discard_passdown ro|rw
+ ro|rw|out_of_data_space [no_]discard_passdown [error|queue]_if_no_space
+ needs_check|-
transaction id:
A 64-bit number used by userspace to help synchronise with metadata
@@ -394,3 +397,6 @@ ii) Status
If the pool has encountered device errors and failed, the status
will just contain the string 'Fail'. The userspace recovery
tools should then be used.
+
+ In the case where <nr mapped sectors> is 0, there is no highest
+ mapped sector and the value of <highest mapped sector> is unspecified.
diff --git a/Documentation/device-mapper/unstriped.txt b/Documentation/device-mapper/unstriped.txt
new file mode 100644
index 000000000000..0b2a306c54ee
--- /dev/null
+++ b/Documentation/device-mapper/unstriped.txt
@@ -0,0 +1,124 @@
+Introduction
+============
+
+The device-mapper "unstriped" target provides a transparent mechanism to
+unstripe a device-mapper "striped" target to access the underlying disks
+without having to touch the true backing block-device. It can also be
+used to unstripe a hardware RAID-0 to access backing disks.
+
+Parameters:
+<number of stripes> <chunk size> <stripe #> <dev_path> <offset>
+
+<number of stripes>
+ The number of stripes in the RAID 0.
+
+<chunk size>
+ The amount of 512B sectors in the chunk striping.
+
+<dev_path>
+ The block device you wish to unstripe.
+
+<stripe #>
+ The stripe number within the device that corresponds to physical
+ drive you wish to unstripe. This must be 0 indexed.
+
+
+Why use this module?
+====================
+
+An example of undoing an existing dm-stripe
+-------------------------------------------
+
+This small bash script will setup 4 loop devices and use the existing
+striped target to combine the 4 devices into one. It then will use
+the unstriped target ontop of the striped device to access the
+individual backing loop devices. We write data to the newly exposed
+unstriped devices and verify the data written matches the correct
+underlying device on the striped array.
+
+#!/bin/bash
+
+MEMBER_SIZE=$((128 * 1024 * 1024))
+NUM=4
+SEQ_END=$((${NUM}-1))
+CHUNK=256
+BS=4096
+
+RAID_SIZE=$((${MEMBER_SIZE}*${NUM}/512))
+DM_PARMS="0 ${RAID_SIZE} striped ${NUM} ${CHUNK}"
+COUNT=$((${MEMBER_SIZE} / ${BS}))
+
+for i in $(seq 0 ${SEQ_END}); do
+ dd if=/dev/zero of=member-${i} bs=${MEMBER_SIZE} count=1 oflag=direct
+ losetup /dev/loop${i} member-${i}
+ DM_PARMS+=" /dev/loop${i} 0"
+done
+
+echo $DM_PARMS | dmsetup create raid0
+for i in $(seq 0 ${SEQ_END}); do
+ echo "0 1 unstriped ${NUM} ${CHUNK} ${i} /dev/mapper/raid0 0" | dmsetup create set-${i}
+done;
+
+for i in $(seq 0 ${SEQ_END}); do
+ dd if=/dev/urandom of=/dev/mapper/set-${i} bs=${BS} count=${COUNT} oflag=direct
+ diff /dev/mapper/set-${i} member-${i}
+done;
+
+for i in $(seq 0 ${SEQ_END}); do
+ dmsetup remove set-${i}
+done
+
+dmsetup remove raid0
+
+for i in $(seq 0 ${SEQ_END}); do
+ losetup -d /dev/loop${i}
+ rm -f member-${i}
+done
+
+Another example
+---------------
+
+Intel NVMe drives contain two cores on the physical device.
+Each core of the drive has segregated access to its LBA range.
+The current LBA model has a RAID 0 128k chunk on each core, resulting
+in a 256k stripe across the two cores:
+
+ Core 0: Core 1:
+ __________ __________
+ | LBA 512| | LBA 768|
+ | LBA 0 | | LBA 256|
+ ---------- ----------
+
+The purpose of this unstriping is to provide better QoS in noisy
+neighbor environments. When two partitions are created on the
+aggregate drive without this unstriping, reads on one partition
+can affect writes on another partition. This is because the partitions
+are striped across the two cores. When we unstripe this hardware RAID 0
+and make partitions on each new exposed device the two partitions are now
+physically separated.
+
+With the dm-unstriped target we're able to segregate an fio script that
+has read and write jobs that are independent of each other. Compared to
+when we run the test on a combined drive with partitions, we were able
+to get a 92% reduction in read latency using this device mapper target.
+
+
+Example dmsetup usage
+=====================
+
+unstriped ontop of Intel NVMe device that has 2 cores
+-----------------------------------------------------
+dmsetup create nvmset0 --table '0 512 unstriped 2 256 0 /dev/nvme0n1 0'
+dmsetup create nvmset1 --table '0 512 unstriped 2 256 1 /dev/nvme0n1 0'
+
+There will now be two devices that expose Intel NVMe core 0 and 1
+respectively:
+/dev/mapper/nvmset0
+/dev/mapper/nvmset1
+
+unstriped ontop of striped with 4 drives using 128K chunk size
+--------------------------------------------------------------
+dmsetup create raid_disk0 --table '0 512 unstriped 4 256 0 /dev/mapper/striped 0'
+dmsetup create raid_disk1 --table '0 512 unstriped 4 256 1 /dev/mapper/striped 0'
+dmsetup create raid_disk2 --table '0 512 unstriped 4 256 2 /dev/mapper/striped 0'
+dmsetup create raid_disk3 --table '0 512 unstriped 4 256 3 /dev/mapper/striped 0'
diff --git a/Documentation/devicetree/bindings/arm/arm-dsu-pmu.txt b/Documentation/devicetree/bindings/arm/arm-dsu-pmu.txt
new file mode 100644
index 000000000000..6efabba530f1
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/arm-dsu-pmu.txt
@@ -0,0 +1,27 @@
+* ARM DynamIQ Shared Unit (DSU) Performance Monitor Unit (PMU)
+
+ARM DyanmIQ Shared Unit (DSU) integrates one or more CPU cores
+with a shared L3 memory system, control logic and external interfaces to
+form a multicore cluster. The PMU enables to gather various statistics on
+the operations of the DSU. The PMU provides independent 32bit counters that
+can count any of the supported events, along with a 64bit cycle counter.
+The PMU is accessed via CPU system registers and has no MMIO component.
+
+** DSU PMU required properties:
+
+- compatible : should be one of :
+
+ "arm,dsu-pmu"
+
+- interrupts : Exactly 1 SPI must be listed.
+
+- cpus : List of phandles for the CPUs connected to this DSU instance.
+
+
+** Example:
+
+dsu-pmu-0 {
+ compatible = "arm,dsu-pmu";
+ interrupts = <GIC_SPI 02 IRQ_TYPE_LEVEL_HIGH>;
+ cpus = <&cpu_0>, <&cpu_1>;
+};
diff --git a/Documentation/devicetree/bindings/arm/firmware/sdei.txt b/Documentation/devicetree/bindings/arm/firmware/sdei.txt
new file mode 100644
index 000000000000..ee3f0ff49889
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/firmware/sdei.txt
@@ -0,0 +1,42 @@
+* Software Delegated Exception Interface (SDEI)
+
+Firmware implementing the SDEI functions described in ARM document number
+ARM DEN 0054A ("Software Delegated Exception Interface") can be used by
+Linux to receive notification of events such as those generated by
+firmware-first error handling, or from an IRQ that has been promoted to
+a firmware-assisted NMI.
+
+The interface provides a number of API functions for registering callbacks
+and enabling/disabling events. Functions are invoked by trapping to the
+privilege level of the SDEI firmware (specified as part of the binding
+below) and passing arguments in a manner specified by the "SMC Calling
+Convention (ARM DEN 0028B):
+
+ r0 => 32-bit Function ID / return value
+ {r1 - r3} => Parameters
+
+Note that the immediate field of the trapping instruction must be set
+to #0.
+
+The SDEI_EVENT_REGISTER function registers a callback in the kernel
+text to handle the specified event number.
+
+The sdei node should be a child node of '/firmware' and have required
+properties:
+
+ - compatible : should contain:
+ * "arm,sdei-1.0" : For implementations complying to SDEI version 1.x.
+
+ - method : The method of calling the SDEI firmware. Permitted
+ values are:
+ * "smc" : SMC #0, with the register assignments specified in this
+ binding.
+ * "hvc" : HVC #0, with the register assignments specified in this
+ binding.
+Example:
+ firmware {
+ sdei {
+ compatible = "arm,sdei-1.0";
+ method = "smc";
+ };
+ };
diff --git a/Documentation/devicetree/bindings/arm/marvell/armada-37xx.txt b/Documentation/devicetree/bindings/arm/marvell/armada-37xx.txt
index 51336e5fc761..35c3c3460d17 100644
--- a/Documentation/devicetree/bindings/arm/marvell/armada-37xx.txt
+++ b/Documentation/devicetree/bindings/arm/marvell/armada-37xx.txt
@@ -14,3 +14,22 @@ following property before the previous one:
Example:
compatible = "marvell,armada-3720-db", "marvell,armada3720", "marvell,armada3710";
+
+
+Power management
+----------------
+
+For power management (particularly DVFS and AVS), the North Bridge
+Power Management component is needed:
+
+Required properties:
+- compatible : should contain "marvell,armada-3700-nb-pm", "syscon";
+- reg : the register start and length for the North Bridge
+ Power Management
+
+Example:
+
+nb_pm: syscon@14000 {
+ compatible = "marvell,armada-3700-nb-pm", "syscon";
+ reg = <0x14000 0x60>;
+}
diff --git a/Documentation/devicetree/bindings/crypto/arm-cryptocell.txt b/Documentation/devicetree/bindings/crypto/arm-cryptocell.txt
new file mode 100644
index 000000000000..cec8d5d74e26
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/arm-cryptocell.txt
@@ -0,0 +1,22 @@
+Arm TrustZone CryptoCell cryptographic engine
+
+Required properties:
+- compatible: Should be "arm,cryptocell-712-ree".
+- reg: Base physical address of the engine and length of memory mapped region.
+- interrupts: Interrupt number for the device.
+
+Optional properties:
+- interrupt-parent: The phandle for the interrupt controller that services
+ interrupts for this device.
+- clocks: Reference to the crypto engine clock.
+- dma-coherent: Present if dma operations are coherent.
+
+Examples:
+
+ arm_cc712: crypto@80000000 {
+ compatible = "arm,cryptocell-712-ree";
+ interrupt-parent = <&intc>;
+ interrupts = < 0 30 4 >;
+ reg = < 0x80000000 0x10000 >;
+
+ };
diff --git a/Documentation/devicetree/bindings/crypto/inside-secure-safexcel.txt b/Documentation/devicetree/bindings/crypto/inside-secure-safexcel.txt
index fbc07d12322f..30c3ce6b502e 100644
--- a/Documentation/devicetree/bindings/crypto/inside-secure-safexcel.txt
+++ b/Documentation/devicetree/bindings/crypto/inside-secure-safexcel.txt
@@ -1,7 +1,8 @@
Inside Secure SafeXcel cryptographic engine
Required properties:
-- compatible: Should be "inside-secure,safexcel-eip197".
+- compatible: Should be "inside-secure,safexcel-eip197" or
+ "inside-secure,safexcel-eip97".
- reg: Base physical address of the engine and length of memory mapped region.
- interrupts: Interrupt numbers for the rings and engine.
- interrupt-names: Should be "ring0", "ring1", "ring2", "ring3", "eip", "mem".
diff --git a/Documentation/devicetree/bindings/crypto/samsung,exynos-rng4.txt b/Documentation/devicetree/bindings/crypto/samsung,exynos-rng4.txt
index 4ca8dd4d7e66..a13fbdb4bd88 100644
--- a/Documentation/devicetree/bindings/crypto/samsung,exynos-rng4.txt
+++ b/Documentation/devicetree/bindings/crypto/samsung,exynos-rng4.txt
@@ -2,7 +2,9 @@ Exynos Pseudo Random Number Generator
Required properties:
-- compatible : Should be "samsung,exynos4-rng".
+- compatible : One of:
+ - "samsung,exynos4-rng" for Exynos4210 and Exynos4412
+ - "samsung,exynos5250-prng" for Exynos5250+
- reg : Specifies base physical address and size of the registers map.
- clocks : Phandle to clock-controller plus clock-specifier pair.
- clock-names : "secss" as a clock name.
diff --git a/Documentation/devicetree/bindings/crypto/st,stm32-cryp.txt b/Documentation/devicetree/bindings/crypto/st,stm32-cryp.txt
new file mode 100644
index 000000000000..970487fa40b8
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/st,stm32-cryp.txt
@@ -0,0 +1,19 @@
+* STMicroelectronics STM32 CRYP
+
+Required properties:
+- compatible: Should be "st,stm32f756-cryp".
+- reg: The address and length of the peripheral registers space
+- clocks: The input clock of the CRYP instance
+- interrupts: The CRYP interrupt
+
+Optional properties:
+- resets: The input reset of the CRYP instance
+
+Example:
+crypto@50060000 {
+ compatible = "st,stm32f756-cryp";
+ reg = <0x50060000 0x400>;
+ interrupts = <79>;
+ clocks = <&rcc 0 STM32F7_AHB2_CLOCK(CRYP)>;
+ resets = <&rcc STM32F7_AHB2_RESET(CRYP)>;
+};
diff --git a/Documentation/devicetree/bindings/dma/qcom_hidma_mgmt.txt b/Documentation/devicetree/bindings/dma/qcom_hidma_mgmt.txt
index b3408cc57be6..1ae4748730a8 100644
--- a/Documentation/devicetree/bindings/dma/qcom_hidma_mgmt.txt
+++ b/Documentation/devicetree/bindings/dma/qcom_hidma_mgmt.txt
@@ -47,8 +47,8 @@ When the OS is not in control of the management interface (i.e. it's a guest),
the channel nodes appear on their own, not under a management node.
Required properties:
-- compatible: must contain "qcom,hidma-1.0" for initial HW or "qcom,hidma-1.1"
-for MSI capable HW.
+- compatible: must contain "qcom,hidma-1.0" for initial HW or
+ "qcom,hidma-1.1"/"qcom,hidma-1.2" for MSI capable HW.
- reg: Addresses for the transfer and event channel
- interrupts: Should contain the event interrupt
- desc-count: Number of asynchronous requests this channel can handle
diff --git a/Documentation/devicetree/bindings/gpio/gpio-axp209.txt b/Documentation/devicetree/bindings/gpio/gpio-axp209.txt
index a6611304dd3c..fc42b2caa06d 100644
--- a/Documentation/devicetree/bindings/gpio/gpio-axp209.txt
+++ b/Documentation/devicetree/bindings/gpio/gpio-axp209.txt
@@ -1,10 +1,17 @@
-AXP209 GPIO controller
+AXP209 GPIO & pinctrl controller
This driver follows the usual GPIO bindings found in
Documentation/devicetree/bindings/gpio/gpio.txt
+This driver follows the usual pinctrl bindings found in
+Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
+
+This driver employs the per-pin muxing pattern.
+
Required properties:
-- compatible: Should be "x-powers,axp209-gpio"
+- compatible: Should be one of:
+ - "x-powers,axp209-gpio"
+ - "x-powers,axp813-gpio"
- #gpio-cells: Should be two. The first cell is the pin number and the
second is the GPIO flags.
- gpio-controller: Marks the device node as a GPIO controller.
@@ -28,3 +35,41 @@ axp209: pmic@34 {
#gpio-cells = <2>;
};
};
+
+The GPIOs can be muxed to other functions and therefore, must be a subnode of
+axp_gpio.
+
+Example:
+
+&axp_gpio {
+ gpio0_adc: gpio0-adc {
+ pins = "GPIO0";
+ function = "adc";
+ };
+};
+
+&example_node {
+ pinctrl-names = "default";
+ pinctrl-0 = <&gpio0_adc>;
+};
+
+GPIOs and their functions
+-------------------------
+
+Each GPIO is independent from the other (i.e. GPIO0 in gpio_in function does
+not force GPIO1 and GPIO2 to be in gpio_in function as well).
+
+axp209
+------
+GPIO | Functions
+------------------------
+GPIO0 | gpio_in, gpio_out, ldo, adc
+GPIO1 | gpio_in, gpio_out, ldo, adc
+GPIO2 | gpio_in, gpio_out
+
+axp813
+------
+GPIO | Functions
+------------------------
+GPIO0 | gpio_in, gpio_out, ldo, adc
+GPIO1 | gpio_in, gpio_out, ldo
diff --git a/Documentation/devicetree/bindings/gpio/renesas,gpio-rcar.txt b/Documentation/devicetree/bindings/gpio/renesas,gpio-rcar.txt
index a7ac460ad657..9474138d776e 100644
--- a/Documentation/devicetree/bindings/gpio/renesas,gpio-rcar.txt
+++ b/Documentation/devicetree/bindings/gpio/renesas,gpio-rcar.txt
@@ -5,7 +5,7 @@ Required Properties:
- compatible: should contain one or more of the following:
- "renesas,gpio-r8a7743": for R8A7743 (RZ/G1M) compatible GPIO controller.
- "renesas,gpio-r8a7745": for R8A7745 (RZ/G1E) compatible GPIO controller.
- - "renesas,gpio-r8a7778": for R8A7778 (R-Mobile M1) compatible GPIO controller.
+ - "renesas,gpio-r8a7778": for R8A7778 (R-Car M1) compatible GPIO controller.
- "renesas,gpio-r8a7779": for R8A7779 (R-Car H1) compatible GPIO controller.
- "renesas,gpio-r8a7790": for R8A7790 (R-Car H2) compatible GPIO controller.
- "renesas,gpio-r8a7791": for R8A7791 (R-Car M2-W) compatible GPIO controller.
diff --git a/Documentation/devicetree/bindings/hwmon/aspeed-pwm-tacho.txt b/Documentation/devicetree/bindings/hwmon/aspeed-pwm-tacho.txt
index 367c8203213b..3ac02988a1a5 100644
--- a/Documentation/devicetree/bindings/hwmon/aspeed-pwm-tacho.txt
+++ b/Documentation/devicetree/bindings/hwmon/aspeed-pwm-tacho.txt
@@ -22,8 +22,9 @@ Required properties for pwm-tacho node:
- compatible : should be "aspeed,ast2400-pwm-tacho" for AST2400 and
"aspeed,ast2500-pwm-tacho" for AST2500.
-- clocks : a fixed clock providing input clock frequency(PWM
- and Fan Tach clock)
+- clocks : phandle to clock provider with the clock number in the second cell
+
+- resets : phandle to reset controller with the reset number in the second cell
fan subnode format:
===================
@@ -48,19 +49,14 @@ Required properties for each child node:
Examples:
-pwm_tacho_fixed_clk: fixedclk {
- compatible = "fixed-clock";
- #clock-cells = <0>;
- clock-frequency = <24000000>;
-};
-
pwm_tacho: pwmtachocontroller@1e786000 {
#address-cells = <1>;
#size-cells = <1>;
#cooling-cells = <2>;
reg = <0x1E786000 0x1000>;
compatible = "aspeed,ast2500-pwm-tacho";
- clocks = <&pwm_tacho_fixed_clk>;
+ clocks = <&syscon ASPEED_CLK_APB>;
+ resets = <&syscon ASPEED_RESET_PWM>;
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_pwm0_default &pinctrl_pwm1_default>;
diff --git a/Documentation/devicetree/bindings/iio/adc/amlogic,meson-saradc.txt b/Documentation/devicetree/bindings/iio/adc/amlogic,meson-saradc.txt
index f413e82c8b83..1e6ee3deb4fa 100644
--- a/Documentation/devicetree/bindings/iio/adc/amlogic,meson-saradc.txt
+++ b/Documentation/devicetree/bindings/iio/adc/amlogic,meson-saradc.txt
@@ -15,7 +15,6 @@ Required properties:
- "clkin" for the reference clock (typically XTAL)
- "core" for the SAR ADC core clock
optional clocks:
- - "sana" for the analog clock
- "adc_clk" for the ADC (sampling) clock
- "adc_sel" for the ADC (sampling) clock mux
- vref-supply: the regulator supply for the ADC reference voltage
diff --git a/Documentation/devicetree/bindings/iio/adc/aspeed_adc.txt b/Documentation/devicetree/bindings/iio/adc/aspeed_adc.txt
index 674e133b7cd7..034fc2ba100e 100644
--- a/Documentation/devicetree/bindings/iio/adc/aspeed_adc.txt
+++ b/Documentation/devicetree/bindings/iio/adc/aspeed_adc.txt
@@ -8,6 +8,7 @@ Required properties:
- reg: memory window mapping address and length
- clocks: Input clock used to derive the sample clock. Expected to be the
SoC's APB clock.
+- resets: Reset controller phandle
- #io-channel-cells: Must be set to <1> to indicate channels are selected
by index.
@@ -15,6 +16,7 @@ Example:
adc@1e6e9000 {
compatible = "aspeed,ast2400-adc";
reg = <0x1e6e9000 0xb0>;
- clocks = <&clk_apb>;
+ clocks = <&syscon ASPEED_CLK_APB>;
+ resets = <&syscon ASPEED_RESET_ADC>;
#io-channel-cells = <1>;
};
diff --git a/Documentation/devicetree/bindings/iio/adc/at91-sama5d2_adc.txt b/Documentation/devicetree/bindings/iio/adc/at91-sama5d2_adc.txt
index 552e7a83951d..6469a4cd2a6d 100644
--- a/Documentation/devicetree/bindings/iio/adc/at91-sama5d2_adc.txt
+++ b/Documentation/devicetree/bindings/iio/adc/at91-sama5d2_adc.txt
@@ -17,6 +17,11 @@ Required properties:
This property uses the IRQ edge types values: IRQ_TYPE_EDGE_RISING ,
IRQ_TYPE_EDGE_FALLING or IRQ_TYPE_EDGE_BOTH
+Optional properties:
+ - dmas: Phandle to dma channel for the ADC.
+ - dma-names: Must be "rx" when dmas property is being used.
+ See ../../dma/dma.txt for details.
+
Example:
adc: adc@fc030000 {
@@ -31,4 +36,6 @@ adc: adc@fc030000 {
vddana-supply = <&vdd_3v3_lp_reg>;
vref-supply = <&vdd_3v3_lp_reg>;
atmel,trigger-edge-type = <IRQ_TYPE_EDGE_BOTH>;
+ dmas = <&dma0 (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) | AT91_XDMAC_DT_PERID(25))>;
+ dma-names = "rx";
}
diff --git a/Documentation/devicetree/bindings/iio/adc/sigma-delta-modulator.txt b/Documentation/devicetree/bindings/iio/adc/sigma-delta-modulator.txt
new file mode 100644
index 000000000000..e9ebb8a20e0d
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/adc/sigma-delta-modulator.txt
@@ -0,0 +1,13 @@
+Device-Tree bindings for sigma delta modulator
+
+Required properties:
+- compatible: should be "ads1201", "sd-modulator". "sd-modulator" can be use
+ as a generic SD modulator if modulator not specified in compatible list.
+- #io-channel-cells = <1>: See the IIO bindings section "IIO consumers".
+
+Example node:
+
+ ads1202: adc@0 {
+ compatible = "sd-modulator";
+ #io-channel-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.txt b/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.txt
index 48bfcaa3ffcd..e8bb8243e92c 100644
--- a/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.txt
+++ b/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.txt
@@ -62,6 +62,15 @@ Required properties:
- st,adc-channels: List of single-ended channels muxed for this ADC.
It can have up to 16 channels on stm32f4 or 20 channels on stm32h7, numbered
from 0 to 15 or 19 (resp. for in0..in15 or in0..in19).
+- st,adc-diff-channels: List of differential channels muxed for this ADC.
+ Depending on part used, some channels can be configured as differential
+ instead of single-ended (e.g. stm32h7). List here positive and negative
+ inputs pairs as <vinp vinn>, <vinp vinn>,... vinp and vinn are numbered
+ from 0 to 19 on stm32h7)
+ Note: At least one of "st,adc-channels" or "st,adc-diff-channels" is required.
+ Both properties can be used together. Some channels can be used as
+ single-ended and some other ones as differential (mixed). But channels
+ can't be configured both as single-ended and differential (invalid).
- #io-channel-cells = <1>: See the IIO bindings section "IIO consumers" in
Documentation/devicetree/bindings/iio/iio-bindings.txt
@@ -111,3 +120,18 @@ Example:
...
other adc child nodes follow...
};
+
+Example to setup:
+- channel 1 as single-ended
+- channels 2 & 3 as differential (with resp. 6 & 7 negative inputs)
+
+ adc: adc@40022000 {
+ compatible = "st,stm32h7-adc-core";
+ ...
+ adc1: adc@0 {
+ compatible = "st,stm32h7-adc";
+ ...
+ st,adc-channels = <1>;
+ st,adc-diff-channels = <2 6>, <3 7>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.txt b/Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.txt
new file mode 100644
index 000000000000..911492da48f3
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.txt
@@ -0,0 +1,128 @@
+STMicroelectronics STM32 DFSDM ADC device driver
+
+
+STM32 DFSDM ADC is a sigma delta analog-to-digital converter dedicated to
+interface external sigma delta modulators to STM32 micro controllers.
+It is mainly targeted for:
+- Sigma delta modulators (motor control, metering...)
+- PDM microphones (audio digital microphone)
+
+It features up to 8 serial digital interfaces (SPI or Manchester) and
+up to 4 filters on stm32h7.
+
+Each child node match with a filter instance.
+
+Contents of a STM32 DFSDM root node:
+------------------------------------
+Required properties:
+- compatible: Should be "st,stm32h7-dfsdm".
+- reg: Offset and length of the DFSDM block register set.
+- clocks: IP and serial interfaces clocking. Should be set according
+ to rcc clock ID and "clock-names".
+- clock-names: Input clock name "dfsdm" must be defined,
+ "audio" is optional. If defined CLKOUT is based on the audio
+ clock, else "dfsdm" is used.
+- #interrupt-cells = <1>;
+- #address-cells = <1>;
+- #size-cells = <0>;
+
+Optional properties:
+- spi-max-frequency: Requested only for SPI master mode.
+ SPI clock OUT frequency (Hz). This clock must be set according
+ to "clock" property. Frequency must be a multiple of the rcc
+ clock frequency. If not, SPI CLKOUT frequency will not be
+ accurate.
+
+Contents of a STM32 DFSDM child nodes:
+--------------------------------------
+
+Required properties:
+- compatible: Must be:
+ "st,stm32-dfsdm-adc" for sigma delta ADCs
+ "st,stm32-dfsdm-dmic" for audio digital microphone.
+- reg: Specifies the DFSDM filter instance used.
+- interrupts: IRQ lines connected to each DFSDM filter instance.
+- st,adc-channels: List of single-ended channels muxed for this ADC.
+ valid values:
+ "st,stm32h7-dfsdm" compatibility: 0 to 7.
+- st,adc-channel-names: List of single-ended channel names.
+- st,filter-order: SinC filter order from 0 to 5.
+ 0: FastSinC
+ [1-5]: order 1 to 5.
+ For audio purpose it is recommended to use order 3 to 5.
+- #io-channel-cells = <1>: See the IIO bindings section "IIO consumers".
+
+Required properties for "st,stm32-dfsdm-adc" compatibility:
+- io-channels: From common IIO binding. Used to pipe external sigma delta
+ modulator or internal ADC output to DFSDM channel.
+ This is not required for "st,stm32-dfsdm-pdm" compatibility as
+ PDM microphone is binded in Audio DT node.
+
+Required properties for "st,stm32-dfsdm-pdm" compatibility:
+- #sound-dai-cells: Must be set to 0.
+- dma: DMA controller phandle and DMA request line associated to the
+ filter instance (specified by the field "reg")
+- dma-names: Must be "rx"
+
+Optional properties:
+- st,adc-channel-types: Single-ended channel input type.
+ - "SPI_R": SPI with data on rising edge (default)
+ - "SPI_F": SPI with data on falling edge
+ - "MANCH_R": manchester codec, rising edge = logic 0
+ - "MANCH_F": manchester codec, falling edge = logic 1
+- st,adc-channel-clk-src: Conversion clock source.
+ - "CLKIN": external SPI clock (CLKIN x)
+ - "CLKOUT": internal SPI clock (CLKOUT) (default)
+ - "CLKOUT_F": internal SPI clock divided by 2 (falling edge).
+ - "CLKOUT_R": internal SPI clock divided by 2 (rising edge).
+
+- st,adc-alt-channel: Must be defined if two sigma delta modulator are
+ connected on same SPI input.
+ If not set, channel n is connected to SPI input n.
+ If set, channel n is connected to SPI input n + 1.
+
+- st,filter0-sync: Set to 1 to synchronize with DFSDM filter instance 0.
+ Used for multi microphones synchronization.
+
+Example of a sigma delta adc connected on DFSDM SPI port 0
+and a pdm microphone connected on DFSDM SPI port 1:
+
+ ads1202: simple_sd_adc@0 {
+ compatible = "ads1202";
+ #io-channel-cells = <1>;
+ };
+
+ dfsdm: dfsdm@40017000 {
+ compatible = "st,stm32h7-dfsdm";
+ reg = <0x40017000 0x400>;
+ clocks = <&rcc DFSDM1_CK>;
+ clock-names = "dfsdm";
+ #interrupt-cells = <1>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ dfsdm_adc0: filter@0 {
+ compatible = "st,stm32-dfsdm-adc";
+ #io-channel-cells = <1>;
+ reg = <0>;
+ interrupts = <110>;
+ st,adc-channels = <0>;
+ st,adc-channel-names = "sd_adc0";
+ st,adc-channel-types = "SPI_F";
+ st,adc-channel-clk-src = "CLKOUT";
+ io-channels = <&ads1202 0>;
+ st,filter-order = <3>;
+ };
+ dfsdm_pdm1: filter@1 {
+ compatible = "st,stm32-dfsdm-dmic";
+ reg = <1>;
+ interrupts = <111>;
+ dmas = <&dmamux1 102 0x400 0x00>;
+ dma-names = "rx";
+ st,adc-channels = <1>;
+ st,adc-channel-names = "dmic1";
+ st,adc-channel-types = "SPI_R";
+ st,adc-channel-clk-src = "CLKOUT";
+ st,filter-order = <5>;
+ };
+ }
diff --git a/Documentation/devicetree/bindings/iio/health/max30102.txt b/Documentation/devicetree/bindings/iio/health/max30102.txt
index 8629c18b0e78..ef2ca0a0306f 100644
--- a/Documentation/devicetree/bindings/iio/health/max30102.txt
+++ b/Documentation/devicetree/bindings/iio/health/max30102.txt
@@ -1,9 +1,11 @@
Maxim MAX30102 heart rate and pulse oximeter sensor
+Maxim MAX30105 optical particle-sensing module
* https://datasheets.maximintegrated.com/en/ds/MAX30102.pdf
+* https://datasheets.maximintegrated.com/en/ds/MAX30105.pdf
Required properties:
- - compatible: must be "maxim,max30102"
+ - compatible: must be "maxim,max30102" or "maxim,max30105"
- reg: the I2C address of the sensor
- interrupt-parent: should be the phandle for the interrupt controller
- interrupts: the sole interrupt generated by the device
@@ -12,8 +14,10 @@ Required properties:
interrupt client node bindings.
Optional properties:
- - maxim,red-led-current-microamp: configuration for RED LED current
+ - maxim,red-led-current-microamp: configuration for red LED current
- maxim,ir-led-current-microamp: configuration for IR LED current
+ - maxim,green-led-current-microamp: configuration for green LED current
+ (max30105 only)
Note that each step is approximately 200 microamps, ranging from 0 uA to
50800 uA.
diff --git a/Documentation/devicetree/bindings/iio/light/uvis25.txt b/Documentation/devicetree/bindings/iio/light/uvis25.txt
new file mode 100644
index 000000000000..3041207e3f3c
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/light/uvis25.txt
@@ -0,0 +1,23 @@
+* ST UVIS25 uv sensor
+
+Required properties:
+- compatible: should be "st,uvis25"
+- reg: i2c address of the sensor / spi cs line
+
+Optional properties:
+- interrupt-parent: should be the phandle for the interrupt controller
+- interrupts: interrupt mapping for IRQ. It should be configured with
+ flags IRQ_TYPE_LEVEL_HIGH, IRQ_TYPE_EDGE_RISING, IRQ_TYPE_LEVEL_LOW or
+ IRQ_TYPE_EDGE_FALLING.
+
+ Refer to interrupt-controller/interrupts.txt for generic interrupt
+ client node bindings.
+
+Example:
+
+uvis25@47 {
+ compatible = "st,uvis25";
+ reg = <0x47>;
+ interrupt-parent = <&gpio0>;
+ interrupts = <0 IRQ_TYPE_EDGE_RISING>;
+};
diff --git a/Documentation/devicetree/bindings/input/hid-over-i2c.txt b/Documentation/devicetree/bindings/input/hid-over-i2c.txt
index 28e8bd8b7d64..4d3da9d91de4 100644
--- a/Documentation/devicetree/bindings/input/hid-over-i2c.txt
+++ b/Documentation/devicetree/bindings/input/hid-over-i2c.txt
@@ -31,7 +31,7 @@ device-specific compatible properties, which should be used in addition to the
- vdd-supply: phandle of the regulator that provides the supply voltage.
- post-power-on-delay-ms: time required by the device after enabling its regulators
- before it is ready for communication. Must be used with 'vdd-supply'.
+ or powering it on, before it is ready for communication.
Example:
diff --git a/Documentation/devicetree/bindings/interrupt-controller/brcm,bcm2836-l1-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/brcm,bcm2836-l1-intc.txt
index f320dcd6e69b..8ced1696c325 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/brcm,bcm2836-l1-intc.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/brcm,bcm2836-l1-intc.txt
@@ -12,7 +12,7 @@ Required properties:
registers
- interrupt-controller: Identifies the node as an interrupt controller
- #interrupt-cells: Specifies the number of cells needed to encode an
- interrupt source. The value shall be 1
+ interrupt source. The value shall be 2
Please refer to interrupts.txt in this directory for details of the common
Interrupt Controllers bindings used by client devices.
@@ -32,6 +32,6 @@ local_intc: local_intc {
compatible = "brcm,bcm2836-l1-intc";
reg = <0x40000000 0x100>;
interrupt-controller;
- #interrupt-cells = <1>;
+ #interrupt-cells = <2>;
interrupt-parent = <&local_intc>;
};
diff --git a/Documentation/devicetree/bindings/interrupt-controller/google,goldfish-pic.txt b/Documentation/devicetree/bindings/interrupt-controller/google,goldfish-pic.txt
new file mode 100644
index 000000000000..35f752706e7d
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/google,goldfish-pic.txt
@@ -0,0 +1,30 @@
+Android Goldfish PIC
+
+Android Goldfish programmable interrupt device used by Android
+emulator.
+
+Required properties:
+
+- compatible : should contain "google,goldfish-pic"
+- reg : <registers mapping>
+- interrupts : <interrupt mapping>
+
+Example for mips when used in cascade mode:
+
+ cpuintc {
+ #interrupt-cells = <0x1>;
+ #address-cells = <0>;
+ interrupt-controller;
+ compatible = "mti,cpu-interrupt-controller";
+ };
+
+ interrupt-controller@1f000000 {
+ compatible = "google,goldfish-pic";
+ reg = <0x1f000000 0x1000>;
+
+ interrupt-controller;
+ #interrupt-cells = <0x1>;
+
+ interrupt-parent = <&cpuintc>;
+ interrupts = <0x2>;
+ };
diff --git a/Documentation/devicetree/bindings/leds/leds-lm3692x.txt b/Documentation/devicetree/bindings/leds/leds-lm3692x.txt
new file mode 100644
index 000000000000..6c9074f84a51
--- /dev/null
+++ b/Documentation/devicetree/bindings/leds/leds-lm3692x.txt
@@ -0,0 +1,49 @@
+* Texas Instruments - LM3692x Highly Efficient White LED Driver
+
+The LM3692x is an ultra-compact, highly efficient,
+white-LED driver designed for LCD display backlighting.
+
+The main difference between the LM36922 and LM36923 is the number of
+LED strings it supports. The LM36922 supports two strings while the LM36923
+supports three strings.
+
+Required properties:
+ - compatible:
+ "ti,lm36922"
+ "ti,lm36923"
+ - reg : I2C slave address
+ - #address-cells : 1
+ - #size-cells : 0
+
+Optional properties:
+ - enable-gpios : gpio pin to enable/disable the device.
+ - vled-supply : LED supply
+
+Required child properties:
+ - reg : 0
+
+Optional child properties:
+ - label : see Documentation/devicetree/bindings/leds/common.txt
+ - linux,default-trigger :
+ see Documentation/devicetree/bindings/leds/common.txt
+
+Example:
+
+led-controller@36 {
+ compatible = "ti,lm3692x";
+ reg = <0x36>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ enable-gpios = <&gpio1 28 GPIO_ACTIVE_HIGH>;
+ vled-supply = <&vbatt>;
+
+ led@0 {
+ reg = <0>;
+ label = "white:backlight_cluster";
+ linux,default-trigger = "backlight";
+ };
+}
+
+For more product information please see the link below:
+http://www.ti.com/lit/ds/snvsa29/snvsa29.pdf
diff --git a/Documentation/devicetree/bindings/leds/leds-lp8860.txt b/Documentation/devicetree/bindings/leds/leds-lp8860.txt
index aad38dd94d4b..5f0e892ad759 100644
--- a/Documentation/devicetree/bindings/leds/leds-lp8860.txt
+++ b/Documentation/devicetree/bindings/leds/leds-lp8860.txt
@@ -6,23 +6,39 @@ current sinks that can be controlled by a PWM input
signal, a SPI/I2C master, or both.
Required properties:
- - compatible:
+ - compatible :
"ti,lp8860"
- - reg - I2C slave address
- - label - Used for naming LEDs
+ - reg : I2C slave address
+ - #address-cells : 1
+ - #size-cells : 0
Optional properties:
- - enable-gpio - gpio pin to enable/disable the device.
- - supply - "vled" - LED supply
+ - enable-gpios : gpio pin to enable (active high)/disable the device.
+ - vled-supply : LED supply
+
+Required child properties:
+ - reg : 0
+
+Optional child properties:
+ - label : see Documentation/devicetree/bindings/leds/common.txt
+ - linux,default-trigger :
+ see Documentation/devicetree/bindings/leds/common.txt
Example:
-leds: leds@6 {
+led-controller@2d {
compatible = "ti,lp8860";
+ #address-cells = <1>;
+ #size-cells = <0>;
reg = <0x2d>;
- label = "display_cluster";
- enable-gpio = <&gpio1 28 GPIO_ACTIVE_HIGH>;
+ enable-gpios = <&gpio1 28 GPIO_ACTIVE_HIGH>;
vled-supply = <&vbatt>;
+
+ led@0 {
+ reg = <0>;
+ label = "white:backlight";
+ linux,default-trigger = "backlight";
+ };
}
For more product information please see the link below:
diff --git a/Documentation/devicetree/bindings/mfd/mc13xxx.txt b/Documentation/devicetree/bindings/mfd/mc13xxx.txt
index ac235fe385fc..8261ea73278a 100644
--- a/Documentation/devicetree/bindings/mfd/mc13xxx.txt
+++ b/Documentation/devicetree/bindings/mfd/mc13xxx.txt
@@ -130,7 +130,7 @@ ecspi@70010000 { /* ECSPI1 */
#size-cells = <0>;
led-control = <0x000 0x000 0x0e0 0x000>;
- sysled {
+ sysled@3 {
reg = <3>;
label = "system:red:live";
linux,default-trigger = "heartbeat";
diff --git a/Documentation/devicetree/bindings/mfd/syscon.txt b/Documentation/devicetree/bindings/mfd/syscon.txt
index 8b92d4576c42..25d9e9c2fd53 100644
--- a/Documentation/devicetree/bindings/mfd/syscon.txt
+++ b/Documentation/devicetree/bindings/mfd/syscon.txt
@@ -16,9 +16,17 @@ Required properties:
Optional property:
- reg-io-width: the size (in bytes) of the IO accesses that should be
performed on the device.
+- hwlocks: reference to a phandle of a hardware spinlock provider node.
Examples:
gpr: iomuxc-gpr@20e0000 {
compatible = "fsl,imx6q-iomuxc-gpr", "syscon";
reg = <0x020e0000 0x38>;
+ hwlocks = <&hwlock1 1>;
+};
+
+hwlock1: hwspinlock@40500000 {
+ ...
+ reg = <0x40500000 0x1000>;
+ #hwlock-cells = <1>;
};
diff --git a/Documentation/devicetree/bindings/mmc/mtk-sd.txt b/Documentation/devicetree/bindings/mmc/mtk-sd.txt
index 72d2a734ab85..9b8017670870 100644
--- a/Documentation/devicetree/bindings/mmc/mtk-sd.txt
+++ b/Documentation/devicetree/bindings/mmc/mtk-sd.txt
@@ -12,6 +12,8 @@ Required properties:
"mediatek,mt8173-mmc": for mmc host ip compatible with mt8173
"mediatek,mt2701-mmc": for mmc host ip compatible with mt2701
"mediatek,mt2712-mmc": for mmc host ip compatible with mt2712
+ "mediatek,mt7623-mmc", "mediatek,mt2701-mmc": for MT7623 SoC
+
- reg: physical base address of the controller and length
- interrupts: Should contain MSDC interrupt number
- clocks: Should contain phandle for the clock feeding the MMC controller
diff --git a/Documentation/devicetree/bindings/mmc/tmio_mmc.txt b/Documentation/devicetree/bindings/mmc/tmio_mmc.txt
index 3c6762430fd9..d8685cb83325 100644
--- a/Documentation/devicetree/bindings/mmc/tmio_mmc.txt
+++ b/Documentation/devicetree/bindings/mmc/tmio_mmc.txt
@@ -26,6 +26,7 @@ Required properties:
"renesas,sdhi-r8a7794" - SDHI IP on R8A7794 SoC
"renesas,sdhi-r8a7795" - SDHI IP on R8A7795 SoC
"renesas,sdhi-r8a7796" - SDHI IP on R8A7796 SoC
+ "renesas,sdhi-r8a77995" - SDHI IP on R8A77995 SoC
"renesas,sdhi-shmobile" - a generic sh-mobile SDHI controller
"renesas,rcar-gen1-sdhi" - a generic R-Car Gen1 SDHI controller
"renesas,rcar-gen2-sdhi" - a generic R-Car Gen2 or RZ/G1
diff --git a/Documentation/devicetree/bindings/mtd/fsl-quadspi.txt b/Documentation/devicetree/bindings/mtd/fsl-quadspi.txt
index c34aa6f8a424..63d4d626fbd5 100644
--- a/Documentation/devicetree/bindings/mtd/fsl-quadspi.txt
+++ b/Documentation/devicetree/bindings/mtd/fsl-quadspi.txt
@@ -12,7 +12,7 @@ Required properties:
- reg-names: Should contain the reg names "QuadSPI" and "QuadSPI-memory"
- interrupts : Should contain the interrupt for the device
- clocks : The clocks needed by the QuadSPI controller
- - clock-names : the name of the clocks
+ - clock-names : Should contain the name of the clocks: "qspi_en" and "qspi".
Optional properties:
- fsl,qspi-has-second-chip: The controller has two buses, bus A and bus B.
diff --git a/Documentation/devicetree/bindings/mtd/gpmc-onenand.txt b/Documentation/devicetree/bindings/mtd/gpmc-onenand.txt
index b6e8bfd024f4..e9f01a963a0a 100644
--- a/Documentation/devicetree/bindings/mtd/gpmc-onenand.txt
+++ b/Documentation/devicetree/bindings/mtd/gpmc-onenand.txt
@@ -9,13 +9,14 @@ Documentation/devicetree/bindings/memory-controllers/omap-gpmc.txt
Required properties:
+ - compatible: "ti,omap2-onenand"
- reg: The CS line the peripheral is connected to
- - gpmc,device-width Width of the ONENAND device connected to the GPMC
+ - gpmc,device-width: Width of the ONENAND device connected to the GPMC
in bytes. Must be 1 or 2.
Optional properties:
- - dma-channel: DMA Channel index
+ - int-gpios: GPIO specifier for the INT pin.
For inline partition table parsing (optional):
@@ -35,6 +36,7 @@ Example for an OMAP3430 board:
#size-cells = <1>;
onenand@0 {
+ compatible = "ti,omap2-onenand";
reg = <0 0 0>; /* CS0, offset 0 */
gpmc,device-width = <2>;
diff --git a/Documentation/devicetree/bindings/mtd/marvell-nand.txt b/Documentation/devicetree/bindings/mtd/marvell-nand.txt
new file mode 100644
index 000000000000..c08fb477b3c6
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/marvell-nand.txt
@@ -0,0 +1,123 @@
+Marvell NAND Flash Controller (NFC)
+
+Required properties:
+- compatible: can be one of the following:
+ * "marvell,armada-8k-nand-controller"
+ * "marvell,armada370-nand-controller"
+ * "marvell,pxa3xx-nand-controller"
+ * "marvell,armada-8k-nand" (deprecated)
+ * "marvell,armada370-nand" (deprecated)
+ * "marvell,pxa3xx-nand" (deprecated)
+ Compatibles marked deprecated support only the old bindings described
+ at the bottom.
+- reg: NAND flash controller memory area.
+- #address-cells: shall be set to 1. Encode the NAND CS.
+- #size-cells: shall be set to 0.
+- interrupts: shall define the NAND controller interrupt.
+- clocks: shall reference the NAND controller clock.
+- marvell,system-controller: Set to retrieve the syscon node that handles
+ NAND controller related registers (only required with the
+ "marvell,armada-8k-nand[-controller]" compatibles).
+
+Optional properties:
+- label: see partition.txt. New platforms shall omit this property.
+- dmas: shall reference DMA channel associated to the NAND controller.
+ This property is only used with "marvell,pxa3xx-nand[-controller]"
+ compatible strings.
+- dma-names: shall be "rxtx".
+ This property is only used with "marvell,pxa3xx-nand[-controller]"
+ compatible strings.
+
+Optional children nodes:
+Children nodes represent the available NAND chips.
+
+Required properties:
+- reg: shall contain the native Chip Select ids (0-3).
+- nand-rb: see nand.txt (0-1).
+
+Optional properties:
+- marvell,nand-keep-config: orders the driver not to take the timings
+ from the core and leaving them completely untouched. Bootloader
+ timings will then be used.
+- label: MTD name.
+- nand-on-flash-bbt: see nand.txt.
+- nand-ecc-mode: see nand.txt. Will use hardware ECC if not specified.
+- nand-ecc-algo: see nand.txt. This property is essentially useful when
+ not using hardware ECC. Howerver, it may be added when using hardware
+ ECC for clarification but will be ignored by the driver because ECC
+ mode is chosen depending on the page size and the strength required by
+ the NAND chip. This value may be overwritten with nand-ecc-strength
+ property.
+- nand-ecc-strength: see nand.txt.
+- nand-ecc-step-size: see nand.txt. Marvell's NAND flash controller does
+ use fixed strength (1-bit for Hamming, 16-bit for BCH), so the actual
+ step size will shrink or grow in order to fit the required strength.
+ Step sizes are not completely random for all and follow certain
+ patterns described in AN-379, "Marvell SoC NFC ECC".
+
+See Documentation/devicetree/bindings/mtd/nand.txt for more details on
+generic bindings.
+
+
+Example:
+nand_controller: nand-controller@d0000 {
+ compatible = "marvell,armada370-nand-controller";
+ reg = <0xd0000 0x54>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ interrupts = <GIC_SPI 84 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&coredivclk 0>;
+
+ nand@0 {
+ reg = <0>;
+ label = "main-storage";
+ nand-rb = <0>;
+ nand-ecc-mode = "hw";
+ marvell,nand-keep-config;
+ nand-on-flash-bbt;
+ nand-ecc-strength = <4>;
+ nand-ecc-step-size = <512>;
+
+ partitions {
+ compatible = "fixed-partitions";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ partition@0 {
+ label = "Rootfs";
+ reg = <0x00000000 0x40000000>;
+ };
+ };
+ };
+};
+
+
+Note on legacy bindings: One can find, in not-updated device trees,
+bindings slightly different than described above with other properties
+described below as well as the partitions node at the root of a so
+called "nand" node (without clear controller/chip separation).
+
+Legacy properties:
+- marvell,nand-enable-arbiter: To enable the arbiter, all boards blindly
+ used it, this bit was set by the bootloader for many boards and even if
+ it is marked reserved in several datasheets, it might be needed to set
+ it (otherwise it is harmless) so whether or not this property is set,
+ the bit is selected by the driver.
+- num-cs: Number of chip-select lines to use, all boards blindly set 1
+ to this and for a reason, other values would have failed. The value of
+ this property is ignored.
+
+Example:
+
+ nand0: nand@43100000 {
+ compatible = "marvell,pxa3xx-nand";
+ reg = <0x43100000 90>;
+ interrupts = <45>;
+ dmas = <&pdma 97 0>;
+ dma-names = "rxtx";
+ #address-cells = <1>;
+ marvell,nand-keep-config;
+ marvell,nand-enable-arbiter;
+ num-cs = <1>;
+ /* Partitions (optional) */
+ };
diff --git a/Documentation/devicetree/bindings/mtd/mtk-nand.txt b/Documentation/devicetree/bindings/mtd/mtk-nand.txt
index 0431841de781..1c88526dedfc 100644
--- a/Documentation/devicetree/bindings/mtd/mtk-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/mtk-nand.txt
@@ -12,8 +12,10 @@ tree nodes.
The first part of NFC is NAND Controller Interface (NFI) HW.
Required NFI properties:
-- compatible: Should be one of "mediatek,mt2701-nfc",
- "mediatek,mt2712-nfc".
+- compatible: Should be one of
+ "mediatek,mt2701-nfc",
+ "mediatek,mt2712-nfc",
+ "mediatek,mt7622-nfc".
- reg: Base physical address and size of NFI.
- interrupts: Interrupts of NFI.
- clocks: NFI required clocks.
@@ -142,7 +144,10 @@ Example:
==============
Required BCH properties:
-- compatible: Should be one of "mediatek,mt2701-ecc", "mediatek,mt2712-ecc".
+- compatible: Should be one of
+ "mediatek,mt2701-ecc",
+ "mediatek,mt2712-ecc",
+ "mediatek,mt7622-ecc".
- reg: Base physical address and size of ECC.
- interrupts: Interrupts of ECC.
- clocks: ECC required clocks.
diff --git a/Documentation/devicetree/bindings/mtd/nand.txt b/Documentation/devicetree/bindings/mtd/nand.txt
index 133f3813719c..8bb11d809429 100644
--- a/Documentation/devicetree/bindings/mtd/nand.txt
+++ b/Documentation/devicetree/bindings/mtd/nand.txt
@@ -43,6 +43,7 @@ Optional NAND chip properties:
This is particularly useful when only the in-band area is
used by the upper layers, and you want to make your NAND
as reliable as possible.
+- nand-rb: shall contain the native Ready/Busy ids.
The ECC strength and ECC step size properties define the correction capability
of a controller. Together, they say a controller can correct "{strength} bit
diff --git a/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt b/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt
index 9a734d808aa7..b7336b9d6a3c 100644
--- a/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt
+++ b/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt
@@ -2,7 +2,10 @@
Required properties:
-- compatible: should be "brcm,bcm7445-switch-v4.0" or "brcm,bcm7278-switch-v4.0"
+- compatible: should be one of
+ "brcm,bcm7445-switch-v4.0"
+ "brcm,bcm7278-switch-v4.0"
+ "brcm,bcm7278-switch-v4.8"
- reg: addresses and length of the register sets for the device, must be 6
pairs of register addresses and lengths
- interrupts: interrupts for the devices, must be two interrupts
diff --git a/Documentation/devicetree/bindings/net/can/can-transceiver.txt b/Documentation/devicetree/bindings/net/can/can-transceiver.txt
new file mode 100644
index 000000000000..0011f53ff159
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/can/can-transceiver.txt
@@ -0,0 +1,24 @@
+Generic CAN transceiver Device Tree binding
+------------------------------
+
+CAN transceiver typically limits the max speed in standard CAN and CAN FD
+modes. Typically these limitations are static and the transceivers themselves
+provide no way to detect this limitation at runtime. For this situation,
+the "can-transceiver" node can be used.
+
+Required Properties:
+ max-bitrate: a positive non 0 value that determines the max
+ speed that CAN/CAN-FD can run. Any other value
+ will be ignored.
+
+Examples:
+
+Based on Texas Instrument's TCAN1042HGV CAN Transceiver
+
+m_can0 {
+ ....
+ can-transceiver {
+ max-bitrate = <5000000>;
+ };
+ ...
+};
diff --git a/Documentation/devicetree/bindings/net/can/fsl-flexcan.txt b/Documentation/devicetree/bindings/net/can/fsl-flexcan.txt
index 56d6cc336e1c..bfc0c433654f 100644
--- a/Documentation/devicetree/bindings/net/can/fsl-flexcan.txt
+++ b/Documentation/devicetree/bindings/net/can/fsl-flexcan.txt
@@ -18,6 +18,12 @@ Optional properties:
- xceiver-supply: Regulator that powers the CAN transceiver
+- big-endian: This means the registers of FlexCAN controller are big endian.
+ This is optional property.i.e. if this property is not present in
+ device tree node then controller is assumed to be little endian.
+ if this property is present then controller is assumed to be big
+ endian.
+
Example:
can@1c000 {
diff --git a/Documentation/devicetree/bindings/net/can/m_can.txt b/Documentation/devicetree/bindings/net/can/m_can.txt
index 63e90421d029..ed614383af9c 100644
--- a/Documentation/devicetree/bindings/net/can/m_can.txt
+++ b/Documentation/devicetree/bindings/net/can/m_can.txt
@@ -43,6 +43,11 @@ Required properties:
Please refer to 2.4.1 Message RAM Configuration in
Bosch M_CAN user manual for details.
+Optional Subnode:
+- can-transceiver : Can-transceiver subnode describing maximum speed
+ that can be used for CAN/CAN-FD modes. See
+ Documentation/devicetree/bindings/net/can/can-transceiver.txt
+ for details.
Example:
SoC dtsi:
m_can1: can@20e8000 {
@@ -63,4 +68,8 @@ Board dts:
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_m_can1>;
status = "enabled";
+
+ can-transceiver {
+ max-bitrate = <5000000>;
+ };
};
diff --git a/Documentation/devicetree/bindings/net/can/rcar_can.txt b/Documentation/devicetree/bindings/net/can/rcar_can.txt
index 06bb7cc334c8..94a7f33ac5e9 100644
--- a/Documentation/devicetree/bindings/net/can/rcar_can.txt
+++ b/Documentation/devicetree/bindings/net/can/rcar_can.txt
@@ -2,7 +2,9 @@ Renesas R-Car CAN controller Device Tree Bindings
-------------------------------------------------
Required properties:
-- compatible: "renesas,can-r8a7778" if CAN controller is a part of R8A7778 SoC.
+- compatible: "renesas,can-r8a7743" if CAN controller is a part of R8A7743 SoC.
+ "renesas,can-r8a7745" if CAN controller is a part of R8A7745 SoC.
+ "renesas,can-r8a7778" if CAN controller is a part of R8A7778 SoC.
"renesas,can-r8a7779" if CAN controller is a part of R8A7779 SoC.
"renesas,can-r8a7790" if CAN controller is a part of R8A7790 SoC.
"renesas,can-r8a7791" if CAN controller is a part of R8A7791 SoC.
@@ -12,7 +14,8 @@ Required properties:
"renesas,can-r8a7795" if CAN controller is a part of R8A7795 SoC.
"renesas,can-r8a7796" if CAN controller is a part of R8A7796 SoC.
"renesas,rcar-gen1-can" for a generic R-Car Gen1 compatible device.
- "renesas,rcar-gen2-can" for a generic R-Car Gen2 compatible device.
+ "renesas,rcar-gen2-can" for a generic R-Car Gen2 or RZ/G1
+ compatible device.
"renesas,rcar-gen3-can" for a generic R-Car Gen3 compatible device.
When compatible with the generic version, nodes must list the
SoC-specific version corresponding to the platform first
diff --git a/Documentation/devicetree/bindings/net/cortina,gemini-ethernet.txt b/Documentation/devicetree/bindings/net/cortina,gemini-ethernet.txt
new file mode 100644
index 000000000000..6c559981d110
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/cortina,gemini-ethernet.txt
@@ -0,0 +1,92 @@
+Cortina Systems Gemini Ethernet Controller
+==========================================
+
+This ethernet controller is found in the Gemini SoC family:
+StorLink SL3512 and SL3516, also known as Cortina Systems
+CS3512 and CS3516.
+
+Required properties:
+- compatible: must be "cortina,gemini-ethernet"
+- reg: must contain the global registers and the V-bit and A-bit
+ memory areas, in total three register sets.
+- syscon: a phandle to the system controller
+- #address-cells: must be specified, must be <1>
+- #size-cells: must be specified, must be <1>
+- ranges: should be state like this giving a 1:1 address translation
+ for the subnodes
+
+The subnodes represents the two ethernet ports in this device.
+They are not independent of each other since they share resources
+in the parent node, and are thus children.
+
+Required subnodes:
+- port0: contains the resources for ethernet port 0
+- port1: contains the resources for ethernet port 1
+
+Required subnode properties:
+- compatible: must be "cortina,gemini-ethernet-port"
+- reg: must contain two register areas: the DMA/TOE memory and
+ the GMAC memory area of the port
+- interrupts: should contain the interrupt line of the port.
+ this is nominally a level interrupt active high.
+- resets: this must provide an SoC-integrated reset line for
+ the port.
+- clocks: this should contain a handle to the PCLK clock for
+ clocking the silicon in this port
+- clock-names: must be "PCLK"
+
+Optional subnode properties:
+- phy-mode: see ethernet.txt
+- phy-handle: see ethernet.txt
+
+Example:
+
+mdio-bus {
+ (...)
+ phy0: ethernet-phy@1 {
+ reg = <1>;
+ device_type = "ethernet-phy";
+ };
+ phy1: ethernet-phy@3 {
+ reg = <3>;
+ device_type = "ethernet-phy";
+ };
+};
+
+
+ethernet@60000000 {
+ compatible = "cortina,gemini-ethernet";
+ reg = <0x60000000 0x4000>, /* Global registers, queue */
+ <0x60004000 0x2000>, /* V-bit */
+ <0x60006000 0x2000>; /* A-bit */
+ syscon = <&syscon>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ gmac0: ethernet-port@0 {
+ compatible = "cortina,gemini-ethernet-port";
+ reg = <0x60008000 0x2000>, /* Port 0 DMA/TOE */
+ <0x6000a000 0x2000>; /* Port 0 GMAC */
+ interrupt-parent = <&intcon>;
+ interrupts = <1 IRQ_TYPE_LEVEL_HIGH>;
+ resets = <&syscon GEMINI_RESET_GMAC0>;
+ clocks = <&syscon GEMINI_CLK_GATE_GMAC0>;
+ clock-names = "PCLK";
+ phy-mode = "rgmii";
+ phy-handle = <&phy0>;
+ };
+
+ gmac1: ethernet-port@1 {
+ compatible = "cortina,gemini-ethernet-port";
+ reg = <0x6000c000 0x2000>, /* Port 1 DMA/TOE */
+ <0x6000e000 0x2000>; /* Port 1 GMAC */
+ interrupt-parent = <&intcon>;
+ interrupts = <2 IRQ_TYPE_LEVEL_HIGH>;
+ resets = <&syscon GEMINI_RESET_GMAC1>;
+ clocks = <&syscon GEMINI_CLK_GATE_GMAC1>;
+ clock-names = "PCLK";
+ phy-mode = "rgmii";
+ phy-handle = <&phy1>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/net/fsl-fec.txt b/Documentation/devicetree/bindings/net/fsl-fec.txt
index f0dc94409107..2d41fb96ce0a 100644
--- a/Documentation/devicetree/bindings/net/fsl-fec.txt
+++ b/Documentation/devicetree/bindings/net/fsl-fec.txt
@@ -59,7 +59,7 @@ ethernet@83fec000 {
reg = <0x83fec000 0x4000>;
interrupts = <87>;
phy-mode = "mii";
- phy-reset-gpios = <&gpio2 14 0>; /* GPIO2_14 */
+ phy-reset-gpios = <&gpio2 14 GPIO_ACTIVE_LOW>; /* GPIO2_14 */
local-mac-address = [00 04 9F 01 1B B9];
phy-supply = <&reg_fec_supply>;
};
@@ -71,7 +71,7 @@ ethernet@83fec000 {
reg = <0x83fec000 0x4000>;
interrupts = <87>;
phy-mode = "mii";
- phy-reset-gpios = <&gpio2 14 0>; /* GPIO2_14 */
+ phy-reset-gpios = <&gpio2 14 GPIO_ACTIVE_LOW>; /* GPIO2_14 */
local-mac-address = [00 04 9F 01 1B B9];
phy-supply = <&reg_fec_supply>;
phy-handle = <&ethphy>;
diff --git a/Documentation/devicetree/bindings/net/ieee802154/adf7242.txt b/Documentation/devicetree/bindings/net/ieee802154/adf7242.txt
index dea5124cdc52..d24172cc6d32 100644
--- a/Documentation/devicetree/bindings/net/ieee802154/adf7242.txt
+++ b/Documentation/devicetree/bindings/net/ieee802154/adf7242.txt
@@ -1,7 +1,7 @@
* ADF7242 IEEE 802.15.4 *
Required properties:
- - compatible: should be "adi,adf7242"
+ - compatible: should be "adi,adf7242", "adi,adf7241"
- spi-max-frequency: maximal bus speed (12.5 MHz)
- reg: the chipselect index
- interrupts: the interrupt generated by the device via pin IRQ1.
diff --git a/Documentation/devicetree/bindings/net/mediatek-net.txt b/Documentation/devicetree/bindings/net/mediatek-net.txt
index 214eaa9a6683..53c13ee384a4 100644
--- a/Documentation/devicetree/bindings/net/mediatek-net.txt
+++ b/Documentation/devicetree/bindings/net/mediatek-net.txt
@@ -28,7 +28,7 @@ Required properties:
- mediatek,sgmiisys: phandle to the syscon node that handles the SGMII setup
which is required for those SoCs equipped with SGMII such as MT7622 SoC.
- mediatek,pctl: phandle to the syscon node that handles the ports slew rate
- and driver current
+ and driver current: only for MT2701 and MT7623 SoC
Optional properties:
- interrupt-parent: Should be the phandle for the interrupt controller
diff --git a/Documentation/devicetree/bindings/net/phy.txt b/Documentation/devicetree/bindings/net/phy.txt
index 77d0b2a61ffa..d2169a56f5e3 100644
--- a/Documentation/devicetree/bindings/net/phy.txt
+++ b/Documentation/devicetree/bindings/net/phy.txt
@@ -53,6 +53,14 @@ Optional Properties:
to ensure the integrated PHY is used. The absence of this property indicates
the muxers should be configured so that the external PHY is used.
+- reset-gpios: The GPIO phandle and specifier for the PHY reset signal.
+
+- reset-assert-us: Delay after the reset was asserted in microseconds.
+ If this property is missing the delay will be skipped.
+
+- reset-deassert-us: Delay after the reset was deasserted in microseconds.
+ If this property is missing the delay will be skipped.
+
Example:
ethernet-phy@0 {
@@ -60,4 +68,8 @@ ethernet-phy@0 {
interrupt-parent = <&PIC>;
interrupts = <35 IRQ_TYPE_EDGE_RISING>;
reg = <0>;
+
+ reset-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>;
+ reset-assert-us = <1000>;
+ reset-deassert-us = <2000>;
};
diff --git a/Documentation/devicetree/bindings/net/sff,sfp.txt b/Documentation/devicetree/bindings/net/sff,sfp.txt
index 60e970ce10ee..f1c441bedf68 100644
--- a/Documentation/devicetree/bindings/net/sff,sfp.txt
+++ b/Documentation/devicetree/bindings/net/sff,sfp.txt
@@ -3,7 +3,9 @@ Transceiver
Required properties:
-- compatible : must be "sff,sfp"
+- compatible : must be one of
+ "sff,sfp" for SFP modules
+ "sff,sff" for soldered down SFF modules
Optional Properties:
@@ -11,7 +13,8 @@ Optional Properties:
interface
- mod-def0-gpios : GPIO phandle and a specifier of the MOD-DEF0 (AKA Mod_ABS)
- module presence input gpio signal, active (module absent) high
+ module presence input gpio signal, active (module absent) high. Must
+ not be present for SFF modules
- los-gpios : GPIO phandle and a specifier of the Receiver Loss of Signal
Indication input gpio signal, active (signal lost) high
@@ -24,10 +27,11 @@ Optional Properties:
- rate-select0-gpios : GPIO phandle and a specifier of the Rx Signaling Rate
Select (AKA RS0) output gpio signal, low: low Rx rate, high: high Rx rate
+ Must not be present for SFF modules
- rate-select1-gpios : GPIO phandle and a specifier of the Tx Signaling Rate
Select (AKA RS1) output gpio signal (SFP+ only), low: low Tx rate, high:
- high Tx rate
+ high Tx rate. Must not be present for SFF modules
Example #1: Direct serdes to SFP connection
diff --git a/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt b/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt
new file mode 100644
index 000000000000..270ea4efff13
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt
@@ -0,0 +1,48 @@
+* Socionext AVE ethernet controller
+
+This describes the devicetree bindings for AVE ethernet controller
+implemented on Socionext UniPhier SoCs.
+
+Required properties:
+ - compatible: Should be
+ - "socionext,uniphier-pro4-ave4" : for Pro4 SoC
+ - "socionext,uniphier-pxs2-ave4" : for PXs2 SoC
+ - "socionext,uniphier-ld11-ave4" : for LD11 SoC
+ - "socionext,uniphier-ld20-ave4" : for LD20 SoC
+ - reg: Address where registers are mapped and size of region.
+ - interrupts: Should contain the MAC interrupt.
+ - phy-mode: See ethernet.txt in the same directory. Allow to choose
+ "rgmii", "rmii", or "mii" according to the PHY.
+ - phy-handle: Should point to the external phy device.
+ See ethernet.txt file in the same directory.
+ - clocks: A phandle to the clock for the MAC.
+
+Optional properties:
+ - resets: A phandle to the reset control for the MAC.
+ - local-mac-address: See ethernet.txt in the same directory.
+
+Required subnode:
+ - mdio: A container for child nodes representing phy nodes.
+ See phy.txt in the same directory.
+
+Example:
+
+ ether: ethernet@65000000 {
+ compatible = "socionext,uniphier-ld20-ave4";
+ reg = <0x65000000 0x8500>;
+ interrupts = <0 66 4>;
+ phy-mode = "rgmii";
+ phy-handle = <&ethphy>;
+ clocks = <&sys_clk 6>;
+ resets = <&sys_rst 6>;
+ local-mac-address = [00 00 00 00 00 00];
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ ethphy: ethphy@1 {
+ reg = <1>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/net/socionext-netsec.txt b/Documentation/devicetree/bindings/net/socionext-netsec.txt
new file mode 100644
index 000000000000..0cff94fb0433
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/socionext-netsec.txt
@@ -0,0 +1,53 @@
+* Socionext NetSec Ethernet Controller IP
+
+Required properties:
+- compatible: Should be "socionext,synquacer-netsec"
+- reg: Address and length of the control register area, followed by the
+ address and length of the EEPROM holding the MAC address and
+ microengine firmware
+- interrupts: Should contain ethernet controller interrupt
+- clocks: phandle to the PHY reference clock
+- clock-names: Should be "phy_ref_clk"
+- phy-mode: See ethernet.txt file in the same directory
+- phy-handle: See ethernet.txt in the same directory.
+
+- mdio device tree subnode: When the Netsec has a phy connected to its local
+ mdio, there must be device tree subnode with the following
+ required properties:
+
+ - #address-cells: Must be <1>.
+ - #size-cells: Must be <0>.
+
+ For each phy on the mdio bus, there must be a node with the following
+ fields:
+ - compatible: Refer to phy.txt
+ - reg: phy id used to communicate to phy.
+
+Optional properties: (See ethernet.txt file in the same directory)
+- dma-coherent: Boolean property, must only be present if memory
+ accesses performed by the device are cache coherent.
+- local-mac-address: See ethernet.txt in the same directory.
+- mac-address: See ethernet.txt in the same directory.
+- max-speed: See ethernet.txt in the same directory.
+- max-frame-size: See ethernet.txt in the same directory.
+
+Example:
+ eth0: ethernet@522d0000 {
+ compatible = "socionext,synquacer-netsec";
+ reg = <0 0x522d0000 0x0 0x10000>, <0 0x10000000 0x0 0x10000>;
+ interrupts = <GIC_SPI 176 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk_netsec>;
+ clock-names = "phy_ref_clk";
+ phy-mode = "rgmii";
+ max-speed = <1000>;
+ max-frame-size = <9000>;
+ phy-handle = <&phy1>;
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ phy1: ethernet-phy@1 {
+ compatible = "ethernet-phy-ieee802.3-c22";
+ reg = <1>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/net/ti,wilink-st.txt b/Documentation/devicetree/bindings/net/ti-bluetooth.txt
index 1649c1f66b07..6d03ff8c7068 100644
--- a/Documentation/devicetree/bindings/net/ti,wilink-st.txt
+++ b/Documentation/devicetree/bindings/net/ti-bluetooth.txt
@@ -1,10 +1,18 @@
-TI WiLink 7/8 (wl12xx/wl18xx) Shared Transport BT/FM/GPS devices
+Texas Instruments Bluetooth Chips
+---------------------------------
+
+This documents the binding structure and common properties for serial
+attached TI Bluetooth devices. The following chips are included in this
+binding:
+
+* TI CC256x Bluetooth devices
+* TI WiLink 7/8 (wl12xx/wl18xx) Shared Transport BT/FM/GPS devices
TI WiLink devices have a UART interface for providing Bluetooth, FM radio,
and GPS over what's called "shared transport". The shared transport is
standard BT HCI protocol with additional channels for the other functions.
-These devices also have a separate WiFi interface as described in
+TI WiLink devices also have a separate WiFi interface as described in
wireless/ti,wlcore.txt.
This bindings follows the UART slave device binding in
@@ -12,6 +20,7 @@ This bindings follows the UART slave device binding in
Required properties:
- compatible: should be one of the following:
+ "ti,cc2560"
"ti,wl1271-st"
"ti,wl1273-st"
"ti,wl1281-st"
@@ -32,6 +41,9 @@ Optional properties:
See ../clocks/clock-bindings.txt for details.
- clock-names : Must include the following entry:
"ext_clock" (External clock provided to the TI combo chip).
+ - nvmem-cells: phandle to nvmem data cell that contains a 6 byte BD address
+ with the most significant byte first (big-endian).
+ - nvmem-cell-names: "bd-address" (required when nvmem-cells is specified)
Example:
@@ -43,5 +55,7 @@ Example:
enable-gpios = <&gpio1 7 GPIO_ACTIVE_HIGH>;
clocks = <&clk32k_wl18xx>;
clock-names = "ext_clock";
+ nvmem-cells = <&bd_address>;
+ nvmem-cell-names = "bd-address";
};
};
diff --git a/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.txt b/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.txt
new file mode 100644
index 000000000000..0c17a0ec9b7b
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.txt
@@ -0,0 +1,32 @@
+* MediaTek mt76xx devices
+
+This node provides properties for configuring the MediaTek mt76xx wireless
+device. The node is expected to be specified as a child node of the PCI
+controller to which the wireless chip is connected.
+
+Optional properties:
+
+- mac-address: See ethernet.txt in the parent directory
+- local-mac-address: See ethernet.txt in the parent directory
+- ieee80211-freq-limit: See ieee80211.txt
+- mediatek,mtd-eeprom: Specify a MTD partition + offset containing EEPROM data
+
+Optional nodes:
+- led: Properties for a connected LED
+ Optional properties:
+ - led-sources: See Documentation/devicetree/bindings/leds/common.txt
+
+&pcie {
+ pcie0 {
+ wifi@0,0 {
+ compatible = "mediatek,mt76";
+ reg = <0x0000 0 0 0 0>;
+ ieee80211-freq-limit = <5000000 6000000>;
+ mediatek,mtd-eeprom = <&factory 0x8000>;
+
+ led {
+ led-sources = <2>;
+ };
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.txt b/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.txt
index 74d7f0af209c..3d2a031217da 100644
--- a/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.txt
+++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.txt
@@ -41,6 +41,9 @@ Optional properties:
- qcom,msi_addr: MSI interrupt address.
- qcom,msi_base: Base value to add before writing MSI data into
MSI address register.
+- qcom,ath10k-calibration-variant: string to search for in the board-2.bin
+ variant list with the same bus and device
+ specific ids
- qcom,ath10k-calibration-data : calibration data + board specific data
as an array, the length can vary between
hw versions.
diff --git a/Documentation/devicetree/bindings/opp/opp.txt b/Documentation/devicetree/bindings/opp/opp.txt
index 9d733af26be7..4e4f30288c8b 100644
--- a/Documentation/devicetree/bindings/opp/opp.txt
+++ b/Documentation/devicetree/bindings/opp/opp.txt
@@ -45,6 +45,11 @@ Devices supporting OPPs must set their "operating-points-v2" property with
phandle to a OPP table in their DT node. The OPP core will use this phandle to
find the operating points for the device.
+This can contain more than one phandle for power domain providers that provide
+multiple power domains. That is, one phandle for each power domain. If only one
+phandle is available, then the same OPP table will be used for all power domains
+provided by the power domain provider.
+
If required, this can be extended for SoC vendor specific bindings. Such bindings
should be documented as Documentation/devicetree/bindings/power/<vendor>-opp.txt
and should have a compatible description like: "operating-points-v2-<vendor>".
@@ -154,6 +159,14 @@ Optional properties:
- status: Marks the node enabled/disabled.
+- required-opp: This contains phandle to an OPP node in another device's OPP
+ table. It may contain an array of phandles, where each phandle points to an
+ OPP of a different device. It should not contain multiple phandles to the OPP
+ nodes in the same OPP table. This specifies the minimum required OPP of the
+ device(s), whose OPP's phandle is present in this property, for the
+ functioning of the current device at the current OPP (where this property is
+ present).
+
Example 1: Single cluster Dual-core ARM cortex A9, switch DVFS states together.
/ {
diff --git a/Documentation/devicetree/bindings/opp/ti-omap5-opp-supply.txt b/Documentation/devicetree/bindings/opp/ti-omap5-opp-supply.txt
new file mode 100644
index 000000000000..832346e489a3
--- /dev/null
+++ b/Documentation/devicetree/bindings/opp/ti-omap5-opp-supply.txt
@@ -0,0 +1,63 @@
+Texas Instruments OMAP compatible OPP supply description
+
+OMAP5, DRA7, and AM57 family of SoCs have Class0 AVS eFuse registers which
+contain data that can be used to adjust voltages programmed for some of their
+supplies for more efficient operation. This binding provides the information
+needed to read these values and use them to program the main regulator during
+an OPP transitions.
+
+Also, some supplies may have an associated vbb-supply which is an Adaptive Body
+Bias regulator which much be transitioned in a specific sequence with regards
+to the vdd-supply and clk when making an OPP transition. By supplying two
+regulators to the device that will undergo OPP transitions we can make use
+of the multi regulator binding that is part of the OPP core described here [1]
+to describe both regulators needed by the platform.
+
+[1] Documentation/devicetree/bindings/opp/opp.txt
+
+Required Properties for Device Node:
+- vdd-supply: phandle to regulator controlling VDD supply
+- vbb-supply: phandle to regulator controlling Body Bias supply
+ (Usually Adaptive Body Bias regulator)
+
+Required Properties for opp-supply node:
+- compatible: Should be one of:
+ "ti,omap-opp-supply" - basic OPP supply controlling VDD and VBB
+ "ti,omap5-opp-supply" - OMAP5+ optimized voltages in efuse(class0)VDD
+ along with VBB
+ "ti,omap5-core-opp-supply" - OMAP5+ optimized voltages in efuse(class0) VDD
+ but no VBB.
+- reg: Address and length of the efuse register set for the device (mandatory
+ only for "ti,omap5-opp-supply")
+- ti,efuse-settings: An array of u32 tuple items providing information about
+ optimized efuse configuration. Each item consists of the following:
+ volt: voltage in uV - reference voltage (OPP voltage)
+ efuse_offseet: efuse offset from reg where the optimized voltage is stored.
+- ti,absolute-max-voltage-uv: absolute maximum voltage for the OPP supply.
+
+Example:
+
+/* Device Node (CPU) */
+cpus {
+ cpu0: cpu@0 {
+ device_type = "cpu";
+
+ ...
+
+ vdd-supply = <&vcc>;
+ vbb-supply = <&abb_mpu>;
+ };
+};
+
+/* OMAP OPP Supply with Class0 registers */
+opp_supply_mpu: opp_supply@4a003b20 {
+ compatible = "ti,omap5-opp-supply";
+ reg = <0x4a003b20 0x8>;
+ ti,efuse-settings = <
+ /* uV offset */
+ 1060000 0x0
+ 1160000 0x4
+ 1210000 0x8
+ >;
+ ti,absolute-max-voltage-uv = <1500000>;
+};
diff --git a/Documentation/devicetree/bindings/power/power_domain.txt b/Documentation/devicetree/bindings/power/power_domain.txt
index 14bd9e945ff6..f3355313c020 100644
--- a/Documentation/devicetree/bindings/power/power_domain.txt
+++ b/Documentation/devicetree/bindings/power/power_domain.txt
@@ -40,6 +40,12 @@ Optional properties:
domain's idle states. In the absence of this property, the domain would be
considered as capable of being powered-on or powered-off.
+- operating-points-v2 : Phandles to the OPP tables of power domains provided by
+ a power domain provider. If the provider provides a single power domain only
+ or all the power domains provided by the provider have identical OPP tables,
+ then this shall contain a single phandle. Refer to ../opp/opp.txt for more
+ information.
+
Example:
power: power-controller@12340000 {
@@ -120,4 +126,63 @@ The node above defines a typical PM domain consumer device, which is located
inside a PM domain with index 0 of a power controller represented by a node
with the label "power".
+Optional properties:
+- required-opp: This contains phandle to an OPP node in another device's OPP
+ table. It may contain an array of phandles, where each phandle points to an
+ OPP of a different device. It should not contain multiple phandles to the OPP
+ nodes in the same OPP table. This specifies the minimum required OPP of the
+ device(s), whose OPP's phandle is present in this property, for the
+ functioning of the current device at the current OPP (where this property is
+ present).
+
+Example:
+- OPP table for domain provider that provides two domains.
+
+ domain0_opp_table: opp-table0 {
+ compatible = "operating-points-v2";
+
+ domain0_opp_0: opp-1000000000 {
+ opp-hz = /bits/ 64 <1000000000>;
+ opp-microvolt = <975000 970000 985000>;
+ };
+ domain0_opp_1: opp-1100000000 {
+ opp-hz = /bits/ 64 <1100000000>;
+ opp-microvolt = <1000000 980000 1010000>;
+ };
+ };
+
+ domain1_opp_table: opp-table1 {
+ compatible = "operating-points-v2";
+
+ domain1_opp_0: opp-1200000000 {
+ opp-hz = /bits/ 64 <1200000000>;
+ opp-microvolt = <975000 970000 985000>;
+ };
+ domain1_opp_1: opp-1300000000 {
+ opp-hz = /bits/ 64 <1300000000>;
+ opp-microvolt = <1000000 980000 1010000>;
+ };
+ };
+
+ power: power-controller@12340000 {
+ compatible = "foo,power-controller";
+ reg = <0x12340000 0x1000>;
+ #power-domain-cells = <1>;
+ operating-points-v2 = <&domain0_opp_table>, <&domain1_opp_table>;
+ };
+
+ leaky-device0@12350000 {
+ compatible = "foo,i-leak-current";
+ reg = <0x12350000 0x1000>;
+ power-domains = <&power 0>;
+ required-opp = <&domain0_opp_0>;
+ };
+
+ leaky-device1@12350000 {
+ compatible = "foo,i-leak-current";
+ reg = <0x12350000 0x1000>;
+ power-domains = <&power 1>;
+ required-opp = <&domain1_opp_1>;
+ };
+
[1]. Documentation/devicetree/bindings/power/domain-idle-state.txt
diff --git a/Documentation/devicetree/bindings/power/reset/imx-snvs-poweroff.txt b/Documentation/devicetree/bindings/power/reset/imx-snvs-poweroff.txt
deleted file mode 100644
index 1b81fcd9fb72..000000000000
--- a/Documentation/devicetree/bindings/power/reset/imx-snvs-poweroff.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-i.mx6 Poweroff Driver
-
-SNVS_LPCR in SNVS module can power off the whole system by pull
-PMIC_ON_REQ low if PMIC_ON_REQ is connected with external PMIC.
-If you don't want to use PMIC_ON_REQ as power on/off control,
-please set status='disabled' to disable this driver.
-
-Required Properties:
--compatible: "fsl,sec-v4.0-poweroff"
--reg: Specifies the physical address of the SNVS_LPCR register
-
-Example:
- snvs@20cc000 {
- compatible = "fsl,sec-v4.0-mon", "simple-bus";
- #address-cells = <1>;
- #size-cells = <1>;
- ranges = <0 0x020cc000 0x4000>;
- .....
- snvs_poweroff: snvs-poweroff@38 {
- compatible = "fsl,sec-v4.0-poweroff";
- reg = <0x38 0x4>;
- };
- }
diff --git a/Documentation/devicetree/bindings/power/supply/bq27xxx.txt b/Documentation/devicetree/bindings/power/supply/bq27xxx.txt
index 6858e1a804ad..615c1cb6889f 100644
--- a/Documentation/devicetree/bindings/power/supply/bq27xxx.txt
+++ b/Documentation/devicetree/bindings/power/supply/bq27xxx.txt
@@ -15,6 +15,7 @@ Required properties:
* "ti,bq27520g2" - BQ27520-g2
* "ti,bq27520g3" - BQ27520-g3
* "ti,bq27520g4" - BQ27520-g4
+ * "ti,bq27521" - BQ27521
* "ti,bq27530" - BQ27530
* "ti,bq27531" - BQ27531
* "ti,bq27541" - BQ27541
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/mpc5200.txt b/Documentation/devicetree/bindings/powerpc/fsl/mpc5200.txt
index 4ccb2cd5df94..d096cf461d81 100644
--- a/Documentation/devicetree/bindings/powerpc/fsl/mpc5200.txt
+++ b/Documentation/devicetree/bindings/powerpc/fsl/mpc5200.txt
@@ -195,4 +195,4 @@ External interrupts:
fsl,mpc5200-mscan nodes
-----------------------
-See file can.txt in this directory.
+See file Documentation/devicetree/bindings/powerpc/fsl/mpc5200.txt
diff --git a/Documentation/devicetree/bindings/regulator/regulator.txt b/Documentation/devicetree/bindings/regulator/regulator.txt
index 3cbf56ce66ea..2babe15b618d 100644
--- a/Documentation/devicetree/bindings/regulator/regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/regulator.txt
@@ -42,8 +42,16 @@ Optional properties:
- regulator-state-[mem/disk] node has following common properties:
- regulator-on-in-suspend: regulator should be on in suspend state.
- regulator-off-in-suspend: regulator should be off in suspend state.
- - regulator-suspend-microvolt: regulator should be set to this voltage
- in suspend.
+ - regulator-suspend-min-microvolt: minimum voltage may be set in
+ suspend state.
+ - regulator-suspend-max-microvolt: maximum voltage may be set in
+ suspend state.
+ - regulator-suspend-microvolt: the default voltage which regulator
+ would be set in suspend. This property is now deprecated, instead
+ setting voltage for suspend mode via the API which regulator
+ driver provides is recommended.
+ - regulator-changeable-in-suspend: whether the default voltage and
+ the regulator on/off in suspend can be changed in runtime.
- regulator-mode: operating mode in the given suspend state.
The set of possible operating modes depends on the capabilities of
every hardware so the valid modes are documented on each regulator
diff --git a/Documentation/devicetree/bindings/regulator/sprd,sc2731-regulator.txt b/Documentation/devicetree/bindings/regulator/sprd,sc2731-regulator.txt
new file mode 100644
index 000000000000..63dc07877cd6
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/sprd,sc2731-regulator.txt
@@ -0,0 +1,43 @@
+Spreadtrum SC2731 Voltage regulators
+
+The SC2731 integrates low-voltage and low quiescent current DCDC/LDO.
+14 LDO and 3 DCDCs are designed for external use. All DCDCs/LDOs have
+their own bypass (power-down) control signals. External tantalum or MLCC
+ceramic capacitors are recommended to use with these LDOs.
+
+Required properties:
+ - compatible: should be "sprd,sc27xx-regulator".
+
+List of regulators provided by this controller. It is named according to
+its regulator type, BUCK_<name> and LDO_<name>. The definition for each
+of these nodes is defined using the standard binding for regulators at
+Documentation/devicetree/bindings/regulator/regulator.txt.
+
+The valid names for regulators are:
+BUCK:
+ BUCK_CPU0, BUCK_CPU1, BUCK_RF
+LDO:
+ LDO_CAMA0, LDO_CAMA1, LDO_CAMMOT, LDO_VLDO, LDO_EMMCCORE, LDO_SDCORE,
+ LDO_SDIO, LDO_WIFIPA, LDO_USB33, LDO_CAMD0, LDO_CAMD1, LDO_CON,
+ LDO_CAMIO, LDO_SRAM
+
+Example:
+ regulators {
+ compatible = "sprd,sc27xx-regulator";
+
+ vddarm0: BUCK_CPU0 {
+ regulator-name = "vddarm0";
+ regulator-min-microvolt = <400000>;
+ regulator-max-microvolt = <1996875>;
+ regulator-ramp-delay = <25000>;
+ regulator-always-on;
+ };
+
+ vddcama0: LDO_CAMA0 {
+ regulator-name = "vddcama0";
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <3750000>;
+ regulator-enable-ramp-delay = <100>;
+ };
+ ...
+ };
diff --git a/Documentation/devicetree/bindings/rng/brcm,bcm2835.txt b/Documentation/devicetree/bindings/rng/brcm,bcm2835.txt
index 26542690b578..627b29531a32 100644
--- a/Documentation/devicetree/bindings/rng/brcm,bcm2835.txt
+++ b/Documentation/devicetree/bindings/rng/brcm,bcm2835.txt
@@ -1,11 +1,19 @@
-BCM2835 Random number generator
+BCM2835/6368 Random number generator
Required properties:
-- compatible : should be "brcm,bcm2835-rng" or "brcm,bcm-nsp-rng" or
- "brcm,bcm5301x-rng"
+- compatible : should be one of
+ "brcm,bcm2835-rng"
+ "brcm,bcm-nsp-rng"
+ "brcm,bcm5301x-rng" or
+ "brcm,bcm6368-rng"
- reg : Specifies base physical address and size of the registers.
+Optional properties:
+
+- clocks : phandle to clock-controller plus clock-specifier pair
+- clock-names : "ipsec" as a clock name
+
Example:
rng {
@@ -17,3 +25,11 @@ rng@18033000 {
compatible = "brcm,bcm-nsp-rng";
reg = <0x18033000 0x14>;
};
+
+random: rng@10004180 {
+ compatible = "brcm,bcm6368-rng";
+ reg = <0x10004180 0x14>;
+
+ clocks = <&periph_clk 18>;
+ clock-names = "ipsec";
+};
diff --git a/Documentation/devicetree/bindings/rng/brcm,bcm6368.txt b/Documentation/devicetree/bindings/rng/brcm,bcm6368.txt
deleted file mode 100644
index 4b5ac600bfbd..000000000000
--- a/Documentation/devicetree/bindings/rng/brcm,bcm6368.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-BCM6368 Random number generator
-
-Required properties:
-
-- compatible : should be "brcm,bcm6368-rng"
-- reg : Specifies base physical address and size of the registers
-- clocks : phandle to clock-controller plus clock-specifier pair
-- clock-names : "ipsec" as a clock name
-
-Example:
- random: rng@10004180 {
- compatible = "brcm,bcm6368-rng";
- reg = <0x10004180 0x14>;
-
- clocks = <&periph_clk 18>;
- clock-names = "ipsec";
- };
diff --git a/Documentation/devicetree/bindings/scsi/hisilicon-sas.txt b/Documentation/devicetree/bindings/scsi/hisilicon-sas.txt
index b6a869f97715..df3bef7998fa 100644
--- a/Documentation/devicetree/bindings/scsi/hisilicon-sas.txt
+++ b/Documentation/devicetree/bindings/scsi/hisilicon-sas.txt
@@ -8,7 +8,10 @@ Main node required properties:
(b) "hisilicon,hip06-sas-v2" for v2 hw in hip06 chipset
(c) "hisilicon,hip07-sas-v2" for v2 hw in hip07 chipset
- sas-addr : array of 8 bytes for host SAS address
- - reg : Address and length of the SAS register
+ - reg : Contains two regions. The first is the address and length of the SAS
+ register. The second is the address and length of CPLD register for
+ SGPIO control. The second is optional, and should be set only when
+ we use a CPLD for directly attached disk LED control.
- hisilicon,sas-syscon: phandle of syscon used for sas control
- ctrl-reset-reg : offset to controller reset register in ctrl reg
- ctrl-reset-sts-reg : offset to controller reset status register in ctrl reg
diff --git a/Documentation/devicetree/bindings/serial/fsl-imx-uart.txt b/Documentation/devicetree/bindings/serial/fsl-imx-uart.txt
index 860a9559839a..afcfbc34e243 100644
--- a/Documentation/devicetree/bindings/serial/fsl-imx-uart.txt
+++ b/Documentation/devicetree/bindings/serial/fsl-imx-uart.txt
@@ -6,10 +6,10 @@ Required properties:
- interrupts : Should contain uart interrupt
Optional properties:
-- fsl,irda-mode : Indicate the uart supports irda mode
- fsl,dte-mode : Indicate the uart works in DTE mode. The uart works
in DCE mode by default.
-- rs485-rts-delay, rs485-rx-during-tx, linux,rs485-enabled-at-boot-time: see rs485.txt
+- rs485-rts-delay, rs485-rts-active-low, rs485-rx-during-tx,
+ linux,rs485-enabled-at-boot-time: see rs485.txt
Please check Documentation/devicetree/bindings/serial/serial.txt
for the complete list of generic properties.
diff --git a/Documentation/devicetree/bindings/serial/fsl-lpuart.txt b/Documentation/devicetree/bindings/serial/fsl-lpuart.txt
index 59567b51cf09..6bd3f2e93d61 100644
--- a/Documentation/devicetree/bindings/serial/fsl-lpuart.txt
+++ b/Documentation/devicetree/bindings/serial/fsl-lpuart.txt
@@ -16,7 +16,8 @@ Required properties:
Optional properties:
- dmas: A list of two dma specifiers, one for each entry in dma-names.
- dma-names: should contain "tx" and "rx".
-- rs485-rts-delay, rs485-rx-during-tx, linux,rs485-enabled-at-boot-time: see rs485.txt
+- rs485-rts-delay, rs485-rts-active-low, rs485-rx-during-tx,
+ linux,rs485-enabled-at-boot-time: see rs485.txt
Note: Optional properties for DMA support. Write them both or both not.
diff --git a/Documentation/devicetree/bindings/serial/ingenic,uart.txt b/Documentation/devicetree/bindings/serial/ingenic,uart.txt
index 02cb7fe59cb7..c3c6406d5cfe 100644
--- a/Documentation/devicetree/bindings/serial/ingenic,uart.txt
+++ b/Documentation/devicetree/bindings/serial/ingenic,uart.txt
@@ -1,8 +1,12 @@
* Ingenic SoC UART
Required properties:
-- compatible : "ingenic,jz4740-uart", "ingenic,jz4760-uart",
- "ingenic,jz4775-uart" or "ingenic,jz4780-uart"
+- compatible : One of:
+ - "ingenic,jz4740-uart",
+ - "ingenic,jz4760-uart",
+ - "ingenic,jz4770-uart",
+ - "ingenic,jz4775-uart",
+ - "ingenic,jz4780-uart".
- reg : offset and length of the register set for the device.
- interrupts : should contain uart interrupt.
- clocks : phandles to the module & baud clocks.
diff --git a/Documentation/devicetree/bindings/serial/maxim,max310x.txt b/Documentation/devicetree/bindings/serial/maxim,max310x.txt
index 83a919c241b0..823f77dd7978 100644
--- a/Documentation/devicetree/bindings/serial/maxim,max310x.txt
+++ b/Documentation/devicetree/bindings/serial/maxim,max310x.txt
@@ -24,13 +24,27 @@ Optional properties:
1 = active low.
Example:
+
+/ {
+ clocks {
+ spi_uart_clk: osc_max14830 {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <3686400>;
+ };
+
+ };
+};
+
+&spi0 {
max14830: max14830@0 {
compatible = "maxim,max14830";
reg = <0>;
- clocks = <&clk20m>;
+ clocks = <&spi_uart_clk>;
clock-names = "osc";
interrupt-parent = <&gpio3>;
- interrupts = <7 IRQ_TYPE_EDGE_FALLING>;
+ interrupts = <7 IRQ_TYPE_LEVEL_LOW>;
gpio-controller;
#gpio-cells = <2>;
};
+};
diff --git a/Documentation/devicetree/bindings/serial/mvebu-uart.txt b/Documentation/devicetree/bindings/serial/mvebu-uart.txt
index d37fabe17bd1..2ae2fee7e023 100644
--- a/Documentation/devicetree/bindings/serial/mvebu-uart.txt
+++ b/Documentation/devicetree/bindings/serial/mvebu-uart.txt
@@ -1,13 +1,53 @@
-* Marvell UART : Non standard UART used in some of Marvell EBU SoCs (e.g., Armada-3700)
+* Marvell UART : Non standard UART used in some of Marvell EBU SoCs
+ e.g., Armada-3700.
Required properties:
-- compatible: "marvell,armada-3700-uart"
+- compatible:
+ - "marvell,armada-3700-uart" for the standard variant of the UART
+ (32 bytes FIFO, no DMA, level interrupts, 8-bit access to the
+ FIFO, baudrate limited to 230400).
+ - "marvell,armada-3700-uart-ext" for the extended variant of the
+ UART (128 bytes FIFO, DMA, front interrupts, 8-bit or 32-bit
+ accesses to the FIFO, baudrate unlimited by the dividers).
- reg: offset and length of the register set for the device.
-- interrupts: device interrupt
+- clocks: UART reference clock used to derive the baudrate. If no clock
+ is provided (possible only with the "marvell,armada-3700-uart"
+ compatible string for backward compatibility), it will only work
+ if the baudrate was initialized by the bootloader and no baudrate
+ change will then be possible.
+- interrupts:
+ - Must contain three elements for the standard variant of the IP
+ (marvell,armada-3700-uart): "uart-sum", "uart-tx" and "uart-rx",
+ respectively the UART sum interrupt, the UART TX interrupt and
+ UART RX interrupt. A corresponding interrupt-names property must
+ be defined.
+ - Must contain two elements for the extended variant of the IP
+ (marvell,armada-3700-uart-ext): "uart-tx" and "uart-rx",
+ respectively the UART TX interrupt and the UART RX interrupt. A
+ corresponding interrupts-names property must be defined.
+ - For backward compatibility reasons, a single element interrupts
+ property is also supported for the standard variant of the IP,
+ containing only the UART sum interrupt. This form is deprecated
+ and should no longer be used.
Example:
- serial@12000 {
+ uart0: serial@12000 {
compatible = "marvell,armada-3700-uart";
reg = <0x12000 0x200>;
- interrupts = <43>;
+ clocks = <&xtalclk>;
+ interrupts =
+ <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "uart-sum", "uart-tx", "uart-rx";
+ };
+
+ uart1: serial@12200 {
+ compatible = "marvell,armada-3700-uart-ext";
+ reg = <0x12200 0x30>;
+ clocks = <&xtalclk>;
+ interrupts =
+ <GIC_SPI 30 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 31 IRQ_TYPE_EDGE_RISING>;
+ interrupt-names = "uart-tx", "uart-rx";
};
diff --git a/Documentation/devicetree/bindings/serial/omap_serial.txt b/Documentation/devicetree/bindings/serial/omap_serial.txt
index 43eac675f21f..4b0f05adb228 100644
--- a/Documentation/devicetree/bindings/serial/omap_serial.txt
+++ b/Documentation/devicetree/bindings/serial/omap_serial.txt
@@ -20,6 +20,7 @@ Optional properties:
node and a DMA channel number.
- dma-names : "rx" for receive channel, "tx" for transmit channel.
- rs485-rts-delay, rs485-rx-during-tx, linux,rs485-enabled-at-boot-time: see rs485.txt
+- rs485-rts-active-high: drive RTS high when sending (default is low).
Example:
diff --git a/Documentation/devicetree/bindings/serial/rs485.txt b/Documentation/devicetree/bindings/serial/rs485.txt
index b8415936dfdb..b7c29f74ebb2 100644
--- a/Documentation/devicetree/bindings/serial/rs485.txt
+++ b/Documentation/devicetree/bindings/serial/rs485.txt
@@ -12,6 +12,7 @@ Optional properties:
* b is the delay between end of data sent and rts signal in milliseconds
it corresponds to the delay after sending data and actual release of the line.
If this property is not specified, <0 0> is assumed.
+- rs485-rts-active-low: drive RTS low when sending (default is high).
- linux,rs485-enabled-at-boot-time: empty property telling to enable the rs485
feature at boot time. It can be disabled later with proper ioctl.
- rs485-rx-during-tx: empty property that enables the receiving of data even
diff --git a/Documentation/devicetree/bindings/sound/dmic.txt b/Documentation/devicetree/bindings/sound/dmic.txt
index 54c8ef6498a8..f7bf65611453 100644
--- a/Documentation/devicetree/bindings/sound/dmic.txt
+++ b/Documentation/devicetree/bindings/sound/dmic.txt
@@ -7,10 +7,12 @@ Required properties:
Optional properties:
- dmicen-gpios: GPIO specifier for dmic to control start and stop
+ - num-channels: Number of microphones on this DAI
Example node:
dmic_codec: dmic@0 {
compatible = "dmic-codec";
dmicen-gpios = <&gpio4 3 GPIO_ACTIVE_HIGH>;
+ num-channels = <1>;
};
diff --git a/Documentation/devicetree/bindings/sound/max98373.txt b/Documentation/devicetree/bindings/sound/max98373.txt
new file mode 100644
index 000000000000..456cb1c59353
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/max98373.txt
@@ -0,0 +1,40 @@
+Maxim Integrated MAX98373 Speaker Amplifier
+
+This device supports I2C.
+
+Required properties:
+
+ - compatible : "maxim,max98373"
+
+ - reg : the I2C address of the device.
+
+Optional properties:
+
+ - maxim,vmon-slot-no : slot number used to send voltage information
+ or in inteleave mode this will be used as
+ interleave slot.
+ slot range : 0 ~ 15, Default : 0
+
+ - maxim,imon-slot-no : slot number used to send current information
+ slot range : 0 ~ 15, Default : 0
+
+ - maxim,spkfb-slot-no : slot number used to send speaker feedback information
+ slot range : 0 ~ 15, Default : 0
+
+ - maxim,interleave-mode : For cases where a single combined channel
+ for the I/V sense data is not sufficient, the device can also be configured
+ to share a single data output channel on alternating frames.
+ In this configuration, the current and voltage data will be frame interleaved
+ on a single output channel.
+ Boolean, define to enable the interleave mode, Default : false
+
+Example:
+
+codec: max98373@31 {
+ compatible = "maxim,max98373";
+ reg = <0x31>;
+ maxim,vmon-slot-no = <0>;
+ maxim,imon-slot-no = <1>;
+ maxim,spkfb-slot-no = <2>;
+ maxim,interleave-mode;
+};
diff --git a/Documentation/devicetree/bindings/sound/mt2701-afe-pcm.txt b/Documentation/devicetree/bindings/sound/mt2701-afe-pcm.txt
index 77a57f84bed4..6df87b97f7cb 100644
--- a/Documentation/devicetree/bindings/sound/mt2701-afe-pcm.txt
+++ b/Documentation/devicetree/bindings/sound/mt2701-afe-pcm.txt
@@ -2,153 +2,143 @@ Mediatek AFE PCM controller for mt2701
Required properties:
- compatible = "mediatek,mt2701-audio";
-- reg: register location and size
- interrupts: should contain AFE and ASYS interrupts
- interrupt-names: should be "afe" and "asys"
- power-domains: should define the power domain
+- clocks: Must contain an entry for each entry in clock-names
+ See ../clocks/clock-bindings.txt for details
- clock-names: should have these clock names:
"infra_sys_audio_clk",
"top_audio_mux1_sel",
"top_audio_mux2_sel",
- "top_audio_mux1_div",
- "top_audio_mux2_div",
- "top_audio_48k_timing",
- "top_audio_44k_timing",
- "top_audpll_mux_sel",
- "top_apll_sel",
- "top_aud1_pll_98M",
- "top_aud2_pll_90M",
- "top_hadds2_pll_98M",
- "top_hadds2_pll_294M",
- "top_audpll",
- "top_audpll_d4",
- "top_audpll_d8",
- "top_audpll_d16",
- "top_audpll_d24",
- "top_audintbus_sel",
- "clk_26m",
- "top_syspll1_d4",
- "top_aud_k1_src_sel",
- "top_aud_k2_src_sel",
- "top_aud_k3_src_sel",
- "top_aud_k4_src_sel",
- "top_aud_k5_src_sel",
- "top_aud_k6_src_sel",
- "top_aud_k1_src_div",
- "top_aud_k2_src_div",
- "top_aud_k3_src_div",
- "top_aud_k4_src_div",
- "top_aud_k5_src_div",
- "top_aud_k6_src_div",
- "top_aud_i2s1_mclk",
- "top_aud_i2s2_mclk",
- "top_aud_i2s3_mclk",
- "top_aud_i2s4_mclk",
- "top_aud_i2s5_mclk",
- "top_aud_i2s6_mclk",
- "top_asm_m_sel",
- "top_asm_h_sel",
- "top_univpll2_d4",
- "top_univpll2_d2",
- "top_syspll_d5";
+ "top_audio_a1sys_hp",
+ "top_audio_a2sys_hp",
+ "i2s0_src_sel",
+ "i2s1_src_sel",
+ "i2s2_src_sel",
+ "i2s3_src_sel",
+ "i2s0_src_div",
+ "i2s1_src_div",
+ "i2s2_src_div",
+ "i2s3_src_div",
+ "i2s0_mclk_en",
+ "i2s1_mclk_en",
+ "i2s2_mclk_en",
+ "i2s3_mclk_en",
+ "i2so0_hop_ck",
+ "i2so1_hop_ck",
+ "i2so2_hop_ck",
+ "i2so3_hop_ck",
+ "i2si0_hop_ck",
+ "i2si1_hop_ck",
+ "i2si2_hop_ck",
+ "i2si3_hop_ck",
+ "asrc0_out_ck",
+ "asrc1_out_ck",
+ "asrc2_out_ck",
+ "asrc3_out_ck",
+ "audio_afe_pd",
+ "audio_afe_conn_pd",
+ "audio_a1sys_pd",
+ "audio_a2sys_pd",
+ "audio_mrgif_pd";
+- assigned-clocks: list of input clocks and dividers for the audio system.
+ See ../clocks/clock-bindings.txt for details.
+- assigned-clocks-parents: parent of input clocks of assigned clocks.
+- assigned-clock-rates: list of clock frequencies of assigned clocks.
+
+Must be a subnode of MediaTek audsys device tree node.
+See ../arm/mediatek/mediatek,audsys.txt for details about the parent node.
Example:
- afe: mt2701-afe-pcm@11220000 {
- compatible = "mediatek,mt2701-audio";
- reg = <0 0x11220000 0 0x2000>,
- <0 0x112A0000 0 0x20000>;
- interrupts = <GIC_SPI 104 IRQ_TYPE_LEVEL_LOW>,
- <GIC_SPI 132 IRQ_TYPE_LEVEL_LOW>;
- interrupt-names = "afe", "asys";
- power-domains = <&scpsys MT2701_POWER_DOMAIN_IFR_MSC>;
- clocks = <&infracfg CLK_INFRA_AUDIO>,
- <&topckgen CLK_TOP_AUD_MUX1_SEL>,
- <&topckgen CLK_TOP_AUD_MUX2_SEL>,
- <&topckgen CLK_TOP_AUD_MUX1_DIV>,
- <&topckgen CLK_TOP_AUD_MUX2_DIV>,
- <&topckgen CLK_TOP_AUD_48K_TIMING>,
- <&topckgen CLK_TOP_AUD_44K_TIMING>,
- <&topckgen CLK_TOP_AUDPLL_MUX_SEL>,
- <&topckgen CLK_TOP_APLL_SEL>,
- <&topckgen CLK_TOP_AUD1PLL_98M>,
- <&topckgen CLK_TOP_AUD2PLL_90M>,
- <&topckgen CLK_TOP_HADDS2PLL_98M>,
- <&topckgen CLK_TOP_HADDS2PLL_294M>,
- <&topckgen CLK_TOP_AUDPLL>,
- <&topckgen CLK_TOP_AUDPLL_D4>,
- <&topckgen CLK_TOP_AUDPLL_D8>,
- <&topckgen CLK_TOP_AUDPLL_D16>,
- <&topckgen CLK_TOP_AUDPLL_D24>,
- <&topckgen CLK_TOP_AUDINTBUS_SEL>,
- <&clk26m>,
- <&topckgen CLK_TOP_SYSPLL1_D4>,
- <&topckgen CLK_TOP_AUD_K1_SRC_SEL>,
- <&topckgen CLK_TOP_AUD_K2_SRC_SEL>,
- <&topckgen CLK_TOP_AUD_K3_SRC_SEL>,
- <&topckgen CLK_TOP_AUD_K4_SRC_SEL>,
- <&topckgen CLK_TOP_AUD_K5_SRC_SEL>,
- <&topckgen CLK_TOP_AUD_K6_SRC_SEL>,
- <&topckgen CLK_TOP_AUD_K1_SRC_DIV>,
- <&topckgen CLK_TOP_AUD_K2_SRC_DIV>,
- <&topckgen CLK_TOP_AUD_K3_SRC_DIV>,
- <&topckgen CLK_TOP_AUD_K4_SRC_DIV>,
- <&topckgen CLK_TOP_AUD_K5_SRC_DIV>,
- <&topckgen CLK_TOP_AUD_K6_SRC_DIV>,
- <&topckgen CLK_TOP_AUD_I2S1_MCLK>,
- <&topckgen CLK_TOP_AUD_I2S2_MCLK>,
- <&topckgen CLK_TOP_AUD_I2S3_MCLK>,
- <&topckgen CLK_TOP_AUD_I2S4_MCLK>,
- <&topckgen CLK_TOP_AUD_I2S5_MCLK>,
- <&topckgen CLK_TOP_AUD_I2S6_MCLK>,
- <&topckgen CLK_TOP_ASM_M_SEL>,
- <&topckgen CLK_TOP_ASM_H_SEL>,
- <&topckgen CLK_TOP_UNIVPLL2_D4>,
- <&topckgen CLK_TOP_UNIVPLL2_D2>,
- <&topckgen CLK_TOP_SYSPLL_D5>;
+ audsys: audio-subsystem@11220000 {
+ compatible = "mediatek,mt2701-audsys", "syscon", "simple-mfd";
+ ...
+
+ afe: audio-controller {
+ compatible = "mediatek,mt2701-audio";
+ interrupts = <GIC_SPI 104 IRQ_TYPE_LEVEL_LOW>,
+ <GIC_SPI 132 IRQ_TYPE_LEVEL_LOW>;
+ interrupt-names = "afe", "asys";
+ power-domains = <&scpsys MT2701_POWER_DOMAIN_IFR_MSC>;
+
+ clocks = <&infracfg CLK_INFRA_AUDIO>,
+ <&topckgen CLK_TOP_AUD_MUX1_SEL>,
+ <&topckgen CLK_TOP_AUD_MUX2_SEL>,
+ <&topckgen CLK_TOP_AUD_48K_TIMING>,
+ <&topckgen CLK_TOP_AUD_44K_TIMING>,
+ <&topckgen CLK_TOP_AUD_K1_SRC_SEL>,
+ <&topckgen CLK_TOP_AUD_K2_SRC_SEL>,
+ <&topckgen CLK_TOP_AUD_K3_SRC_SEL>,
+ <&topckgen CLK_TOP_AUD_K4_SRC_SEL>,
+ <&topckgen CLK_TOP_AUD_K1_SRC_DIV>,
+ <&topckgen CLK_TOP_AUD_K2_SRC_DIV>,
+ <&topckgen CLK_TOP_AUD_K3_SRC_DIV>,
+ <&topckgen CLK_TOP_AUD_K4_SRC_DIV>,
+ <&topckgen CLK_TOP_AUD_I2S1_MCLK>,
+ <&topckgen CLK_TOP_AUD_I2S2_MCLK>,
+ <&topckgen CLK_TOP_AUD_I2S3_MCLK>,
+ <&topckgen CLK_TOP_AUD_I2S4_MCLK>,
+ <&audsys CLK_AUD_I2SO1>,
+ <&audsys CLK_AUD_I2SO2>,
+ <&audsys CLK_AUD_I2SO3>,
+ <&audsys CLK_AUD_I2SO4>,
+ <&audsys CLK_AUD_I2SIN1>,
+ <&audsys CLK_AUD_I2SIN2>,
+ <&audsys CLK_AUD_I2SIN3>,
+ <&audsys CLK_AUD_I2SIN4>,
+ <&audsys CLK_AUD_ASRCO1>,
+ <&audsys CLK_AUD_ASRCO2>,
+ <&audsys CLK_AUD_ASRCO3>,
+ <&audsys CLK_AUD_ASRCO4>,
+ <&audsys CLK_AUD_AFE>,
+ <&audsys CLK_AUD_AFE_CONN>,
+ <&audsys CLK_AUD_A1SYS>,
+ <&audsys CLK_AUD_A2SYS>,
+ <&audsys CLK_AUD_AFE_MRGIF>;
+
+ clock-names = "infra_sys_audio_clk",
+ "top_audio_mux1_sel",
+ "top_audio_mux2_sel",
+ "top_audio_a1sys_hp",
+ "top_audio_a2sys_hp",
+ "i2s0_src_sel",
+ "i2s1_src_sel",
+ "i2s2_src_sel",
+ "i2s3_src_sel",
+ "i2s0_src_div",
+ "i2s1_src_div",
+ "i2s2_src_div",
+ "i2s3_src_div",
+ "i2s0_mclk_en",
+ "i2s1_mclk_en",
+ "i2s2_mclk_en",
+ "i2s3_mclk_en",
+ "i2so0_hop_ck",
+ "i2so1_hop_ck",
+ "i2so2_hop_ck",
+ "i2so3_hop_ck",
+ "i2si0_hop_ck",
+ "i2si1_hop_ck",
+ "i2si2_hop_ck",
+ "i2si3_hop_ck",
+ "asrc0_out_ck",
+ "asrc1_out_ck",
+ "asrc2_out_ck",
+ "asrc3_out_ck",
+ "audio_afe_pd",
+ "audio_afe_conn_pd",
+ "audio_a1sys_pd",
+ "audio_a2sys_pd",
+ "audio_mrgif_pd";
- clock-names = "infra_sys_audio_clk",
- "top_audio_mux1_sel",
- "top_audio_mux2_sel",
- "top_audio_mux1_div",
- "top_audio_mux2_div",
- "top_audio_48k_timing",
- "top_audio_44k_timing",
- "top_audpll_mux_sel",
- "top_apll_sel",
- "top_aud1_pll_98M",
- "top_aud2_pll_90M",
- "top_hadds2_pll_98M",
- "top_hadds2_pll_294M",
- "top_audpll",
- "top_audpll_d4",
- "top_audpll_d8",
- "top_audpll_d16",
- "top_audpll_d24",
- "top_audintbus_sel",
- "clk_26m",
- "top_syspll1_d4",
- "top_aud_k1_src_sel",
- "top_aud_k2_src_sel",
- "top_aud_k3_src_sel",
- "top_aud_k4_src_sel",
- "top_aud_k5_src_sel",
- "top_aud_k6_src_sel",
- "top_aud_k1_src_div",
- "top_aud_k2_src_div",
- "top_aud_k3_src_div",
- "top_aud_k4_src_div",
- "top_aud_k5_src_div",
- "top_aud_k6_src_div",
- "top_aud_i2s1_mclk",
- "top_aud_i2s2_mclk",
- "top_aud_i2s3_mclk",
- "top_aud_i2s4_mclk",
- "top_aud_i2s5_mclk",
- "top_aud_i2s6_mclk",
- "top_asm_m_sel",
- "top_asm_h_sel",
- "top_univpll2_d4",
- "top_univpll2_d2",
- "top_syspll_d5";
+ assigned-clocks = <&topckgen CLK_TOP_AUD_MUX1_SEL>,
+ <&topckgen CLK_TOP_AUD_MUX2_SEL>,
+ <&topckgen CLK_TOP_AUD_MUX1_DIV>,
+ <&topckgen CLK_TOP_AUD_MUX2_DIV>;
+ assigned-clock-parents = <&topckgen CLK_TOP_AUD1PLL_98M>,
+ <&topckgen CLK_TOP_AUD2PLL_90M>;
+ assigned-clock-rates = <0>, <0>, <49152000>, <45158400>;
+ };
};
diff --git a/Documentation/devicetree/bindings/sound/mxs-audio-sgtl5000.txt b/Documentation/devicetree/bindings/sound/mxs-audio-sgtl5000.txt
index 601c518eddaa..4eb980bd0287 100644
--- a/Documentation/devicetree/bindings/sound/mxs-audio-sgtl5000.txt
+++ b/Documentation/devicetree/bindings/sound/mxs-audio-sgtl5000.txt
@@ -1,10 +1,31 @@
* Freescale MXS audio complex with SGTL5000 codec
Required properties:
-- compatible: "fsl,mxs-audio-sgtl5000"
-- model: The user-visible name of this sound complex
-- saif-controllers: The phandle list of the MXS SAIF controller
-- audio-codec: The phandle of the SGTL5000 audio codec
+- compatible : "fsl,mxs-audio-sgtl5000"
+- model : The user-visible name of this sound complex
+- saif-controllers : The phandle list of the MXS SAIF controller
+- audio-codec : The phandle of the SGTL5000 audio codec
+- audio-routing : A list of the connections between audio components.
+ Each entry is a pair of strings, the first being the
+ connection's sink, the second being the connection's
+ source. Valid names could be power supplies, SGTL5000
+ pins, and the jacks on the board:
+
+ Power supplies:
+ * Mic Bias
+
+ SGTL5000 pins:
+ * MIC_IN
+ * LINE_IN
+ * HP_OUT
+ * LINE_OUT
+
+ Board connectors:
+ * Mic Jack
+ * Line In Jack
+ * Headphone Jack
+ * Line Out Jack
+ * Ext Spk
Example:
@@ -14,4 +35,8 @@ sound {
model = "imx28-evk-sgtl5000";
saif-controllers = <&saif0 &saif1>;
audio-codec = <&sgtl5000>;
+ audio-routing =
+ "MIC_IN", "Mic Jack",
+ "Mic Jack", "Mic Bias",
+ "Headphone Jack", "HP_OUT";
};
diff --git a/Documentation/devicetree/bindings/sound/nau8825.txt b/Documentation/devicetree/bindings/sound/nau8825.txt
index 2f5e973285a6..d16d96839bcb 100644
--- a/Documentation/devicetree/bindings/sound/nau8825.txt
+++ b/Documentation/devicetree/bindings/sound/nau8825.txt
@@ -69,7 +69,7 @@ Optional properties:
- nuvoton,jack-insert-debounce: number from 0 to 7 that sets debounce time to 2^(n+2) ms
- nuvoton,jack-eject-debounce: number from 0 to 7 that sets debounce time to 2^(n+2) ms
- - nuvoton,crosstalk-bypass: make crosstalk function bypass if set.
+ - nuvoton,crosstalk-enable: make crosstalk function enable if set.
- clocks: list of phandle and clock specifier pairs according to common clock bindings for the
clocks described in clock-names
@@ -98,7 +98,7 @@ Example:
nuvoton,short-key-debounce = <2>;
nuvoton,jack-insert-debounce = <7>;
nuvoton,jack-eject-debounce = <7>;
- nuvoton,crosstalk-bypass;
+ nuvoton,crosstalk-enable;
clock-names = "mclk";
clocks = <&tegra_car TEGRA210_CLK_CLK_OUT_2>;
diff --git a/Documentation/devicetree/bindings/sound/pcm186x.txt b/Documentation/devicetree/bindings/sound/pcm186x.txt
new file mode 100644
index 000000000000..1087f4855980
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/pcm186x.txt
@@ -0,0 +1,42 @@
+Texas Instruments PCM186x Universal Audio ADC
+
+These devices support both I2C and SPI (configured with pin strapping
+on the board).
+
+Required properties:
+
+ - compatible : "ti,pcm1862",
+ "ti,pcm1863",
+ "ti,pcm1864",
+ "ti,pcm1865"
+
+ - reg : The I2C address of the device for I2C, the chip select
+ number for SPI.
+
+ - avdd-supply: Analog core power supply (3.3v)
+ - dvdd-supply: Digital core power supply
+ - iovdd-supply: Digital IO power supply
+ See regulator/regulator.txt for more information
+
+CODEC input pins:
+ * VINL1
+ * VINR1
+ * VINL2
+ * VINR2
+ * VINL3
+ * VINR3
+ * VINL4
+ * VINR4
+
+The pins can be used in referring sound node's audio-routing property.
+
+Example:
+
+ pcm186x: audio-codec@4a {
+ compatible = "ti,pcm1865";
+ reg = <0x4a>;
+
+ avdd-supply = <&reg_3v3_analog>;
+ dvdd-supply = <&reg_3v3>;
+ iovdd-supply = <&reg_1v8>;
+ };
diff --git a/Documentation/devicetree/bindings/sound/renesas,rsnd.txt b/Documentation/devicetree/bindings/sound/renesas,rsnd.txt
index 085bec364caf..5bed9a595772 100644
--- a/Documentation/devicetree/bindings/sound/renesas,rsnd.txt
+++ b/Documentation/devicetree/bindings/sound/renesas,rsnd.txt
@@ -4,7 +4,7 @@ Renesas R-Car sound
* Modules
=============================================
-Renesas R-Car sound is constructed from below modules
+Renesas R-Car and RZ/G sound is constructed from below modules
(for Gen2 or later)
SCU : Sampling Rate Converter Unit
@@ -197,12 +197,17 @@ Ex)
[MEM] -> [SRC2] -> [CTU03] -+
sound {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
compatible = "simple-scu-audio-card";
...
- simple-audio-card,cpu-0 {
+ simple-audio-card,cpu@0 {
+ reg = <0>;
sound-dai = <&rcar_sound 0>;
};
- simple-audio-card,cpu-1 {
+ simple-audio-card,cpu@1 {
+ reg = <1>;
sound-dai = <&rcar_sound 1>;
};
simple-audio-card,codec {
@@ -334,9 +339,11 @@ Required properties:
- compatible : "renesas,rcar_sound-<soctype>", fallbacks
"renesas,rcar_sound-gen1" if generation1, and
- "renesas,rcar_sound-gen2" if generation2
+ "renesas,rcar_sound-gen2" if generation2 (or RZ/G1)
"renesas,rcar_sound-gen3" if generation3
Examples with soctypes are:
+ - "renesas,rcar_sound-r8a7743" (RZ/G1M)
+ - "renesas,rcar_sound-r8a7745" (RZ/G1E)
- "renesas,rcar_sound-r8a7778" (R-Car M1A)
- "renesas,rcar_sound-r8a7779" (R-Car H1)
- "renesas,rcar_sound-r8a7790" (R-Car H2)
diff --git a/Documentation/devicetree/bindings/sound/simple-card.txt b/Documentation/devicetree/bindings/sound/simple-card.txt
index 166f2290233b..17c13e74667d 100644
--- a/Documentation/devicetree/bindings/sound/simple-card.txt
+++ b/Documentation/devicetree/bindings/sound/simple-card.txt
@@ -140,6 +140,7 @@ sound {
simple-audio-card,name = "Cubox Audio";
simple-audio-card,dai-link@0 { /* I2S - HDMI */
+ reg = <0>;
format = "i2s";
cpu {
sound-dai = <&audio1 0>;
@@ -150,6 +151,7 @@ sound {
};
simple-audio-card,dai-link@1 { /* S/PDIF - HDMI */
+ reg = <1>;
cpu {
sound-dai = <&audio1 1>;
};
@@ -159,6 +161,7 @@ sound {
};
simple-audio-card,dai-link@2 { /* S/PDIF - S/PDIF */
+ reg = <2>;
cpu {
sound-dai = <&audio1 1>;
};
diff --git a/Documentation/devicetree/bindings/sound/st,stm32-adfsdm.txt b/Documentation/devicetree/bindings/sound/st,stm32-adfsdm.txt
new file mode 100644
index 000000000000..864f5b00b031
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/st,stm32-adfsdm.txt
@@ -0,0 +1,63 @@
+STMicroelectronics Audio Digital Filter Sigma Delta modulators(DFSDM)
+
+The DFSDM allows PDM microphones capture through SPI interface. The Audio
+interface is seems as a sub block of the DFSDM device.
+For details on DFSDM bindings refer to ../iio/adc/st,stm32-dfsdm-adc.txt
+
+Required properties:
+ - compatible: "st,stm32h7-dfsdm-dai".
+
+ - #sound-dai-cells : Must be equal to 0
+
+ - io-channels : phandle to iio dfsdm instance node.
+
+Example of a sound card using audio DFSDM node.
+
+ sound_card {
+ compatible = "audio-graph-card";
+
+ dais = <&cpu_port>;
+ };
+
+ dfsdm: dfsdm@40017000 {
+ compatible = "st,stm32h7-dfsdm";
+ reg = <0x40017000 0x400>;
+ clocks = <&rcc DFSDM1_CK>;
+ clock-names = "dfsdm";
+ #interrupt-cells = <1>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ dfsdm_adc0: filter@0 {
+ compatible = "st,stm32-dfsdm-dmic";
+ reg = <0>;
+ interrupts = <110>;
+ dmas = <&dmamux1 101 0x400 0x00>;
+ dma-names = "rx";
+ st,adc-channels = <1>;
+ st,adc-channel-names = "dmic0";
+ st,adc-channel-types = "SPI_R";
+ st,adc-channel-clk-src = "CLKOUT";
+ st,filter-order = <5>;
+
+ dfsdm_dai0: dfsdm-dai {
+ compatible = "st,stm32h7-dfsdm-dai";
+ #sound-dai-cells = <0>;
+ io-channels = <&dfsdm_adc0 0>;
+ cpu_port: port {
+ dfsdm_endpoint: endpoint {
+ remote-endpoint = <&dmic0_endpoint>;
+ };
+ };
+ };
+ };
+
+ dmic0: dmic@0 {
+ compatible = "dmic-codec";
+ #sound-dai-cells = <0>;
+ port {
+ dmic0_endpoint: endpoint {
+ remote-endpoint = <&dfsdm_endpoint>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/sound/st,stm32-sai.txt b/Documentation/devicetree/bindings/sound/st,stm32-sai.txt
index 1f9cd7095337..b1acc1a256ba 100644
--- a/Documentation/devicetree/bindings/sound/st,stm32-sai.txt
+++ b/Documentation/devicetree/bindings/sound/st,stm32-sai.txt
@@ -20,11 +20,6 @@ Required properties:
Optional properties:
- resets: Reference to a reset controller asserting the SAI
- - st,sync: specify synchronization mode.
- By default SAI sub-block is in asynchronous mode.
- This property sets SAI sub-block as slave of another SAI sub-block.
- Must contain the phandle and index of the sai sub-block providing
- the synchronization.
SAI subnodes:
Two subnodes corresponding to SAI sub-block instances A et B can be defined.
@@ -44,6 +39,13 @@ SAI subnodes required properties:
- pinctrl-names: should contain only value "default"
- pinctrl-0: see Documentation/devicetree/bindings/pinctrl/pinctrl-stm32.txt
+SAI subnodes Optional properties:
+ - st,sync: specify synchronization mode.
+ By default SAI sub-block is in asynchronous mode.
+ This property sets SAI sub-block as slave of another SAI sub-block.
+ Must contain the phandle and index of the sai sub-block providing
+ the synchronization.
+
The device node should contain one 'port' child node with one child 'endpoint'
node, according to the bindings defined in Documentation/devicetree/bindings/
graph.txt.
diff --git a/Documentation/devicetree/bindings/sound/sun4i-i2s.txt b/Documentation/devicetree/bindings/sound/sun4i-i2s.txt
index 05d7135a8d2f..b9d50d6cdef3 100644
--- a/Documentation/devicetree/bindings/sound/sun4i-i2s.txt
+++ b/Documentation/devicetree/bindings/sound/sun4i-i2s.txt
@@ -8,6 +8,7 @@ Required properties:
- compatible: should be one of the following:
- "allwinner,sun4i-a10-i2s"
- "allwinner,sun6i-a31-i2s"
+ - "allwinner,sun8i-a83t-i2s"
- "allwinner,sun8i-h3-i2s"
- reg: physical base address of the controller and length of memory mapped
region.
@@ -23,6 +24,7 @@ Required properties:
Required properties for the following compatibles:
- "allwinner,sun6i-a31-i2s"
+ - "allwinner,sun8i-a83t-i2s"
- "allwinner,sun8i-h3-i2s"
- resets: phandle to the reset line for this codec
diff --git a/Documentation/devicetree/bindings/sound/tas5720.txt b/Documentation/devicetree/bindings/sound/tas5720.txt
index 40d94f82beb3..7481653fe8e3 100644
--- a/Documentation/devicetree/bindings/sound/tas5720.txt
+++ b/Documentation/devicetree/bindings/sound/tas5720.txt
@@ -6,10 +6,12 @@ audio playback. For more product information please see the links below:
http://www.ti.com/product/TAS5720L
http://www.ti.com/product/TAS5720M
+http://www.ti.com/product/TAS5722L
Required properties:
-- compatible : "ti,tas5720"
+- compatible : "ti,tas5720",
+ "ti,tas5722"
- reg : I2C slave address
- dvdd-supply : phandle to a 3.3-V supply for the digital circuitry
- pvdd-supply : phandle to a supply used for the Class-D amp and the analog
diff --git a/Documentation/devicetree/bindings/sound/tfa9879.txt b/Documentation/devicetree/bindings/sound/tfa9879.txt
index 23ba522d9e2b..1620e6848436 100644
--- a/Documentation/devicetree/bindings/sound/tfa9879.txt
+++ b/Documentation/devicetree/bindings/sound/tfa9879.txt
@@ -6,18 +6,18 @@ Required properties:
- reg : the I2C address of the device
+- #sound-dai-cells : must be 0.
+
Example:
&i2c1 {
- clock-frequency = <100000>;
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_i2c1>;
- status = "okay";
- codec: tfa9879@6c {
+ amp: amp@6c {
#sound-dai-cells = <0>;
compatible = "nxp,tfa9879";
reg = <0x6c>;
- };
+ };
};
diff --git a/Documentation/devicetree/bindings/sound/ti,tas6424.txt b/Documentation/devicetree/bindings/sound/ti,tas6424.txt
new file mode 100644
index 000000000000..1c4ada0eef4e
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/ti,tas6424.txt
@@ -0,0 +1,20 @@
+Texas Instruments TAS6424 Quad-Channel Audio amplifier
+
+The TAS6424 serial control bus communicates through I2C protocols.
+
+Required properties:
+ - compatible: "ti,tas6424" - TAS6424
+ - reg: I2C slave address
+ - sound-dai-cells: must be equal to 0
+
+Example:
+
+tas6424: tas6424@6a {
+ compatible = "ti,tas6424";
+ reg = <0x6a>;
+
+ #sound-dai-cells = <0>;
+};
+
+For more product information please see the link below:
+http://www.ti.com/product/TAS6424-Q1
diff --git a/Documentation/devicetree/bindings/sound/tlv320aic31xx.txt b/Documentation/devicetree/bindings/sound/tlv320aic31xx.txt
index 6fbba562eaa7..5b3c33bb99e5 100644
--- a/Documentation/devicetree/bindings/sound/tlv320aic31xx.txt
+++ b/Documentation/devicetree/bindings/sound/tlv320aic31xx.txt
@@ -22,7 +22,7 @@ Required properties:
Optional properties:
-- gpio-reset - gpio pin number used for codec reset
+- reset-gpios - GPIO specification for the active low RESET input.
- ai31xx-micbias-vg - MicBias Voltage setting
1 or MICBIAS_2_0V - MICBIAS output is powered to 2.0V
2 or MICBIAS_2_5V - MICBIAS output is powered to 2.5V
@@ -30,6 +30,10 @@ Optional properties:
If this node is not mentioned or if the value is unknown, then
micbias is set to 2.0V.
+Deprecated properties:
+
+- gpio-reset - gpio pin number used for codec reset
+
CODEC output pins:
* HPL
* HPR
@@ -48,6 +52,7 @@ CODEC input pins:
The pins can be used in referring sound node's audio-routing property.
Example:
+#include <dt-bindings/gpio/gpio.h>
#include <dt-bindings/sound/tlv320aic31xx-micbias.h>
tlv320aic31xx: tlv320aic31xx@18 {
@@ -56,6 +61,8 @@ tlv320aic31xx: tlv320aic31xx@18 {
ai31xx-micbias-vg = <MICBIAS_OFF>;
+ reset-gpios = <&gpio1 17 GPIO_ACTIVE_LOW>;
+
HPVDD-supply = <&regulator>;
SPRVDD-supply = <&regulator>;
SPLVDD-supply = <&regulator>;
diff --git a/Documentation/devicetree/bindings/sound/tlv320aic3x.txt b/Documentation/devicetree/bindings/sound/tlv320aic3x.txt
index ba5b45c483f5..9796c4639262 100644
--- a/Documentation/devicetree/bindings/sound/tlv320aic3x.txt
+++ b/Documentation/devicetree/bindings/sound/tlv320aic3x.txt
@@ -17,7 +17,7 @@ Required properties:
Optional properties:
-- gpio-reset - gpio pin number used for codec reset
+- reset-gpios - GPIO specification for the active low RESET input.
- ai3x-gpio-func - <array of 2 int> - AIC3X_GPIO1 & AIC3X_GPIO2 Functionality
- Not supported on tlv320aic3104
- ai3x-micbias-vg - MicBias Voltage required.
@@ -34,6 +34,10 @@ Optional properties:
- AVDD-supply, IOVDD-supply, DRVDD-supply, DVDD-supply : power supplies for the
device as covered in Documentation/devicetree/bindings/regulator/regulator.txt
+Deprecated properties:
+
+- gpio-reset - gpio pin number used for codec reset
+
CODEC output pins:
* LLOUT
* RLOUT
@@ -61,10 +65,14 @@ The pins can be used in referring sound node's audio-routing property.
Example:
+#include <dt-bindings/gpio/gpio.h>
+
tlv320aic3x: tlv320aic3x@1b {
compatible = "ti,tlv320aic3x";
reg = <0x1b>;
+ reset-gpios = <&gpio1 17 GPIO_ACTIVE_LOW>;
+
AVDD-supply = <&regulator>;
IOVDD-supply = <&regulator>;
DRVDD-supply = <&regulator>;
diff --git a/Documentation/devicetree/bindings/sound/tscs42xx.txt b/Documentation/devicetree/bindings/sound/tscs42xx.txt
new file mode 100644
index 000000000000..2ac2f0996697
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/tscs42xx.txt
@@ -0,0 +1,16 @@
+TSCS42XX Audio CODEC
+
+Required Properties:
+
+ - compatible : "tempo,tscs42A1" for analog mic
+ "tempo,tscs42A2" for digital mic
+
+ - reg : <0x71> for analog mic
+ <0x69> for digital mic
+
+Example:
+
+wookie: codec@69 {
+ compatible = "tempo,tscs42A2";
+ reg = <0x69>;
+};
diff --git a/Documentation/devicetree/bindings/sound/uniphier,evea.txt b/Documentation/devicetree/bindings/sound/uniphier,evea.txt
new file mode 100644
index 000000000000..3f31b235f18b
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/uniphier,evea.txt
@@ -0,0 +1,26 @@
+Socionext EVEA - UniPhier SoC internal codec driver
+
+Required properties:
+- compatible : should be "socionext,uniphier-evea".
+- reg : offset and length of the register set for the device.
+- clock-names : should include following entries:
+ "evea", "exiv"
+- clocks : a list of phandle, should contain an entry for each
+ entries in clock-names.
+- reset-names : should include following entries:
+ "evea", "exiv", "adamv"
+- resets : a list of phandle, should contain reset entries of
+ reset-names.
+- #sound-dai-cells: should be 1.
+
+Example:
+
+ codec {
+ compatible = "socionext,uniphier-evea";
+ reg = <0x57900000 0x1000>;
+ clock-names = "evea", "exiv";
+ clocks = <&sys_clk 41>, <&sys_clk 42>;
+ reset-names = "evea", "exiv", "adamv";
+ resets = <&sys_rst 41>, <&sys_rst 42>, <&adamv_rst 0>;
+ #sound-dai-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/spi/sh-msiof.txt b/Documentation/devicetree/bindings/spi/sh-msiof.txt
index bdd83959019c..80710f0f0448 100644
--- a/Documentation/devicetree/bindings/spi/sh-msiof.txt
+++ b/Documentation/devicetree/bindings/spi/sh-msiof.txt
@@ -36,7 +36,21 @@ Required properties:
Optional properties:
- clocks : Must contain a reference to the functional clock.
-- num-cs : Total number of chip-selects (default is 1)
+- num-cs : Total number of chip selects (default is 1).
+ Up to 3 native chip selects are supported:
+ 0: MSIOF_SYNC
+ 1: MSIOF_SS1
+ 2: MSIOF_SS2
+ Hardware limitations related to chip selects:
+ - Native chip selects are always deasserted in
+ between transfers that are part of the same
+ message. Use cs-gpios to work around this.
+ - All slaves using native chip selects must use the
+ same spi-cs-high configuration. Use cs-gpios to
+ work around this.
+ - When using GPIO chip selects, at least one native
+ chip select must be left unused, as it will be
+ driven anyway.
- dmas : Must contain a list of two references to DMA
specifiers, one for transmission, and one for
reception.
diff --git a/Documentation/devicetree/bindings/spi/spi-meson.txt b/Documentation/devicetree/bindings/spi/spi-meson.txt
index 825c39cae74a..b7f5e86fed22 100644
--- a/Documentation/devicetree/bindings/spi/spi-meson.txt
+++ b/Documentation/devicetree/bindings/spi/spi-meson.txt
@@ -27,7 +27,9 @@ The Meson SPICC is generic SPI controller for general purpose Full-Duplex
communications with dedicated 16 words RX/TX PIO FIFOs.
Required properties:
- - compatible: should be "amlogic,meson-gx-spicc" on Amlogic GX SoCs.
+ - compatible: should be:
+ "amlogic,meson-gx-spicc" on Amlogic GX and compatible SoCs.
+ "amlogic,meson-axg-spicc" on Amlogic AXG and compatible SoCs
- reg: physical base address and length of the controller registers
- interrupts: The interrupt specifier
- clock-names: Must contain "core"
diff --git a/Documentation/devicetree/bindings/spi/spi-orion.txt b/Documentation/devicetree/bindings/spi/spi-orion.txt
index df8ec31f2f07..8434a65fc12a 100644
--- a/Documentation/devicetree/bindings/spi/spi-orion.txt
+++ b/Documentation/devicetree/bindings/spi/spi-orion.txt
@@ -18,8 +18,17 @@ Required properties:
The eight register sets following the control registers refer to
chip-select lines 0 through 7 respectively.
- cell-index : Which of multiple SPI controllers is this.
+- clocks : pointers to the reference clocks for this device, the first
+ one is the one used for the clock on the spi bus, the
+ second one is optional and is the clock used for the
+ functional part of the controller
+
Optional properties:
- interrupts : Is currently not used.
+- clock-names : names of used clocks, mandatory if the second clock is
+ used, the name must be "core", and "axi" (the latter
+ is only for Armada 7K/8K).
+
Example:
spi@10600 {
diff --git a/Documentation/devicetree/bindings/spi/spi-xilinx.txt b/Documentation/devicetree/bindings/spi/spi-xilinx.txt
index c7b7856bd528..7bf61efc66c8 100644
--- a/Documentation/devicetree/bindings/spi/spi-xilinx.txt
+++ b/Documentation/devicetree/bindings/spi/spi-xilinx.txt
@@ -2,7 +2,7 @@ Xilinx SPI controller Device Tree Bindings
-------------------------------------------------
Required properties:
-- compatible : Should be "xlnx,xps-spi-2.00.a" or "xlnx,xps-spi-2.00.b"
+- compatible : Should be "xlnx,xps-spi-2.00.a", "xlnx,xps-spi-2.00.b" or "xlnx,axi-quad-spi-1.00.a"
- reg : Physical base address and size of SPI registers map.
- interrupts : Property with a value describing the interrupt
number.
diff --git a/Documentation/devicetree/bindings/timer/actions,owl-timer.txt b/Documentation/devicetree/bindings/timer/actions,owl-timer.txt
index e3c28da80cb2..977054f87563 100644
--- a/Documentation/devicetree/bindings/timer/actions,owl-timer.txt
+++ b/Documentation/devicetree/bindings/timer/actions,owl-timer.txt
@@ -2,6 +2,7 @@ Actions Semi Owl Timer
Required properties:
- compatible : "actions,s500-timer" for S500
+ "actions,s700-timer" for S700
"actions,s900-timer" for S900
- reg : Offset and length of the register set for the device.
- interrupts : Should contain the interrupts.
diff --git a/Documentation/devicetree/bindings/timer/spreadtrum,sprd-timer.txt b/Documentation/devicetree/bindings/timer/spreadtrum,sprd-timer.txt
new file mode 100644
index 000000000000..6d97e7d0f6e8
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/spreadtrum,sprd-timer.txt
@@ -0,0 +1,20 @@
+Spreadtrum timers
+
+The Spreadtrum SC9860 platform provides 3 general-purpose timers.
+These timers can support 32bit or 64bit counter, as well as supporting
+period mode or one-shot mode, and they are can be wakeup source
+during deep sleep.
+
+Required properties:
+- compatible: should be "sprd,sc9860-timer" for SC9860 platform.
+- reg: The register address of the timer device.
+- interrupts: Should contain the interrupt for the timer device.
+- clocks: The phandle to the source clock (usually a 32.768 KHz fixed clock).
+
+Example:
+ timer@40050000 {
+ compatible = "sprd,sc9860-timer";
+ reg = <0 0x40050000 0 0x20>;
+ interrupts = <GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&ext_32k>;
+ };
diff --git a/Documentation/devicetree/bindings/usb/dwc3.txt b/Documentation/devicetree/bindings/usb/dwc3.txt
index 52fb41046b34..44e8bab159ad 100644
--- a/Documentation/devicetree/bindings/usb/dwc3.txt
+++ b/Documentation/devicetree/bindings/usb/dwc3.txt
@@ -47,6 +47,8 @@ Optional properties:
from P0 to P1/P2/P3 without delay.
- snps,dis-tx-ipgap-linecheck-quirk: when set, disable u2mac linestate check
during HS transmit.
+ - snps,dis_metastability_quirk: when set, disable metastability workaround.
+ CAUTION: use only if you are absolutely sure of it.
- snps,is-utmi-l1-suspend: true when DWC3 asserts output signal
utmi_l1_suspend_n, false when asserts utmi_sleep_n
- snps,hird-threshold: HIRD threshold
diff --git a/Documentation/devicetree/bindings/usb/mediatek,mtk-xhci.txt b/Documentation/devicetree/bindings/usb/mediatek,mtk-xhci.txt
index 30595964876a..88d9f4a4b280 100644
--- a/Documentation/devicetree/bindings/usb/mediatek,mtk-xhci.txt
+++ b/Documentation/devicetree/bindings/usb/mediatek,mtk-xhci.txt
@@ -35,10 +35,14 @@ Required properties:
- phys : a list of phandle + phy specifier pairs
Optional properties:
- - mediatek,wakeup-src : 1: ip sleep wakeup mode; 2: line state wakeup
- mode;
- - mediatek,syscon-wakeup : phandle to syscon used to access USB wakeup
- control register, it depends on "mediatek,wakeup-src".
+ - wakeup-source : enable USB remote wakeup;
+ - mediatek,syscon-wakeup : phandle to syscon used to access the register
+ of the USB wakeup glue layer between xHCI and SPM; it depends on
+ "wakeup-source", and has two arguments:
+ - the first one : register base address of the glue layer in syscon;
+ - the second one : hardware version of the glue layer
+ - 1 : used by mt8173 etc
+ - 2 : used by mt2712 etc
- mediatek,u3p-dis-msk : mask to disable u3ports, bit0 for u3port0,
bit1 for u3port1, ... etc;
- vbus-supply : reference to the VBUS regulator;
@@ -46,6 +50,7 @@ Optional properties:
- pinctrl-names : a pinctrl state named "default" must be defined
- pinctrl-0 : pin control group
See: Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
+ - imod-interval-ns: default interrupt moderation interval is 5000ns
Example:
usb30: usb@11270000 {
@@ -64,8 +69,9 @@ usb30: usb@11270000 {
vusb33-supply = <&mt6397_vusb_reg>;
vbus-supply = <&usb_p1_vbus>;
usb3-lpm-capable;
- mediatek,syscon-wakeup = <&pericfg>;
- mediatek,wakeup-src = <1>;
+ mediatek,syscon-wakeup = <&pericfg 0x400 1>;
+ wakeup-source;
+ imod-interval-ns = <10000>;
};
2nd: dual-role mode with xHCI driver
diff --git a/Documentation/devicetree/bindings/usb/mediatek,mtu3.txt b/Documentation/devicetree/bindings/usb/mediatek,mtu3.txt
index b2271d8e6b50..d589a1ef96a1 100644
--- a/Documentation/devicetree/bindings/usb/mediatek,mtu3.txt
+++ b/Documentation/devicetree/bindings/usb/mediatek,mtu3.txt
@@ -42,9 +42,14 @@ Optional properties:
- enable-manual-drd : supports manual dual-role switch via debugfs; usually
used when receptacle is TYPE-A and also wants to support dual-role
mode.
- - mediatek,enable-wakeup : supports ip sleep wakeup used by host mode
- - mediatek,syscon-wakeup : phandle to syscon used to access USB wakeup
- control register, it depends on "mediatek,enable-wakeup".
+ - wakeup-source: enable USB remote wakeup of host mode.
+ - mediatek,syscon-wakeup : phandle to syscon used to access the register
+ of the USB wakeup glue layer between SSUSB and SPM; it depends on
+ "wakeup-source", and has two arguments:
+ - the first one : register base address of the glue layer in syscon;
+ - the second one : hardware version of the glue layer
+ - 1 : used by mt8173 etc
+ - 2 : used by mt2712 etc
- mediatek,u3p-dis-msk : mask to disable u3ports, bit0 for u3port0,
bit1 for u3port1, ... etc;
@@ -71,8 +76,8 @@ ssusb: usb@11271000 {
vbus-supply = <&usb_p0_vbus>;
extcon = <&extcon_usb>;
dr_mode = "otg";
- mediatek,enable-wakeup;
- mediatek,syscon-wakeup = <&pericfg>;
+ wakeup-source;
+ mediatek,syscon-wakeup = <&pericfg 0x400 1>;
#address-cells = <2>;
#size-cells = <2>;
ranges;
diff --git a/Documentation/devicetree/bindings/usb/renesas_usbhs.txt b/Documentation/devicetree/bindings/usb/renesas_usbhs.txt
index 47394ab788e3..d060172f1529 100644
--- a/Documentation/devicetree/bindings/usb/renesas_usbhs.txt
+++ b/Documentation/devicetree/bindings/usb/renesas_usbhs.txt
@@ -13,8 +13,10 @@ Required properties:
- "renesas,usbhs-r8a7795" for r8a7795 (R-Car H3) compatible device
- "renesas,usbhs-r8a7796" for r8a7796 (R-Car M3-W) compatible device
- "renesas,usbhs-r8a77995" for r8a77995 (R-Car D3) compatible device
+ - "renesas,usbhs-r7s72100" for r7s72100 (RZ/A1) compatible device
- "renesas,rcar-gen2-usbhs" for R-Car Gen2 or RZ/G1 compatible devices
- "renesas,rcar-gen3-usbhs" for R-Car Gen3 compatible device
+ - "renesas,rza1-usbhs" for RZ/A1 compatible device
When compatible with the generic version, nodes must list the
SoC-specific version corresponding to the platform first followed
diff --git a/Documentation/devicetree/bindings/usb/usb-device.txt b/Documentation/devicetree/bindings/usb/usb-device.txt
index 1b27cebb47f4..036be172b1ae 100644
--- a/Documentation/devicetree/bindings/usb/usb-device.txt
+++ b/Documentation/devicetree/bindings/usb/usb-device.txt
@@ -4,8 +4,49 @@ Usually, we only use device tree for hard wired USB device.
The reference binding doc is from:
http://www.devicetree.org/open-firmware/bindings/usb/usb-1_0.ps
+Four types of device-tree nodes are defined: "host-controller nodes"
+representing USB host controllers, "device nodes" representing USB devices,
+"interface nodes" representing USB interfaces and "combined nodes"
+representing simple USB devices.
-Required properties:
+A combined node shall be used instead of a device node and an interface node
+for devices of class 0 or 9 (hub) with a single configuration and a single
+interface.
+
+A "hub node" is a combined node or an interface node that represents a USB
+hub.
+
+
+Required properties for device nodes:
+- compatible: "usbVID,PID", where VID is the vendor id and PID the product id.
+ The textual representation of VID and PID shall be in lower case hexadecimal
+ with leading zeroes suppressed. The other compatible strings from the above
+ standard binding could also be used, but a device adhering to this binding
+ may leave out all except for "usbVID,PID".
+- reg: the number of the USB hub port or the USB host-controller port to which
+ this device is attached. The range is 1-255.
+
+
+Required properties for device nodes with interface nodes:
+- #address-cells: shall be 2
+- #size-cells: shall be 0
+
+
+Required properties for interface nodes:
+- compatible: "usbifVID,PID.configCN.IN", where VID is the vendor id, PID is
+ the product id, CN is the configuration value and IN is the interface
+ number. The textual representation of VID, PID, CN and IN shall be in lower
+ case hexadecimal with leading zeroes suppressed. The other compatible
+ strings from the above standard binding could also be used, but a device
+ adhering to this binding may leave out all except for
+ "usbifVID,PID.configCN.IN".
+- reg: the interface number and configuration value
+
+The configuration component is not included in the textual representation of
+an interface-node unit address for configuration 1.
+
+
+Required properties for combined nodes:
- compatible: "usbVID,PID", where VID is the vendor id and PID the product id.
The textual representation of VID and PID shall be in lower case hexadecimal
with leading zeroes suppressed. The other compatible strings from the above
@@ -31,8 +72,31 @@ Example:
#address-cells = <1>;
#size-cells = <0>;
- hub@1 { /* hub connected to port 1 */
+ hub@1 { /* hub connected to port 1 */
compatible = "usb5e3,608";
reg = <1>;
};
+
+ device@2 { /* device connected to port 2 */
+ compatible = "usb123,4567";
+ reg = <2>;
+ };
+
+ device@3 { /* device connected to port 3 */
+ compatible = "usb123,abcd";
+ reg = <3>;
+
+ #address-cells = <2>;
+ #size-cells = <0>;
+
+ interface@0 { /* interface 0 of configuration 1 */
+ compatible = "usbif123,abcd.config1.0";
+ reg = <0 1>;
+ };
+
+ interface@0,2 { /* interface 0 of configuration 2 */
+ compatible = "usbif123,abcd.config2.0";
+ reg = <0 2>;
+ };
+ };
};
diff --git a/Documentation/devicetree/bindings/usb/usb-xhci.txt b/Documentation/devicetree/bindings/usb/usb-xhci.txt
index ae6e484a8d7c..e2ea59bbca93 100644
--- a/Documentation/devicetree/bindings/usb/usb-xhci.txt
+++ b/Documentation/devicetree/bindings/usb/usb-xhci.txt
@@ -7,12 +7,14 @@ Required properties:
- "marvell,armada3700-xhci" for Armada 37xx SoCs
- "marvell,armada-375-xhci" for Armada 375 SoCs
- "marvell,armada-380-xhci" for Armada 38x SoCs
+ - "renesas,xhci-r8a7743" for r8a7743 SoC
- "renesas,xhci-r8a7790" for r8a7790 SoC
- "renesas,xhci-r8a7791" for r8a7791 SoC
- "renesas,xhci-r8a7793" for r8a7793 SoC
- "renesas,xhci-r8a7795" for r8a7795 SoC
- "renesas,xhci-r8a7796" for r8a7796 SoC
- - "renesas,rcar-gen2-xhci" for a generic R-Car Gen2 compatible device
+ - "renesas,rcar-gen2-xhci" for a generic R-Car Gen2 or RZ/G1 compatible
+ device
- "renesas,rcar-gen3-xhci" for a generic R-Car Gen3 compatible device
- "xhci-platform" (deprecated)
@@ -29,6 +31,7 @@ Optional properties:
- usb2-lpm-disable: indicate if we don't want to enable USB2 HW LPM
- usb3-lpm-capable: determines if platform is USB3 LPM capable
- quirk-broken-port-ped: set if the controller has broken port disable mechanism
+ - imod-interval-ns: default interrupt moderation interval is 5000ns
Example:
usb@f0931000 {
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt
index 0994bdd82cd3..f776fb804a8c 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@@ -347,6 +347,7 @@ tcg Trusted Computing Group
tcl Toby Churchill Ltd.
technexion TechNexion
technologic Technologic Systems
+tempo Tempo Semiconductor
terasic Terasic Inc.
thine THine Electronics, Inc.
ti Texas Instruments
diff --git a/Documentation/devicetree/bindings/watchdog/zii,rave-sp-wdt.txt b/Documentation/devicetree/bindings/watchdog/zii,rave-sp-wdt.txt
new file mode 100644
index 000000000000..3de96186e92e
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/zii,rave-sp-wdt.txt
@@ -0,0 +1,39 @@
+Zodiac Inflight Innovations RAVE Supervisory Processor Watchdog Bindings
+
+RAVE SP watchdog device is a "MFD cell" device corresponding to
+watchdog functionality of RAVE Supervisory Processor. It is expected
+that its Device Tree node is specified as a child of the node
+corresponding to the parent RAVE SP device (as documented in
+Documentation/devicetree/bindings/mfd/zii,rave-sp.txt)
+
+Required properties:
+
+- compatible: Depending on wire protocol implemented by RAVE SP
+ firmware, should be one of:
+ - "zii,rave-sp-watchdog"
+ - "zii,rave-sp-watchdog-legacy"
+
+Optional properties:
+
+- wdt-timeout: Two byte nvmem cell specified as per
+ Documentation/devicetree/bindings/nvmem/nvmem.txt
+
+Example:
+
+ rave-sp {
+ compatible = "zii,rave-sp-rdu1";
+ current-speed = <38400>;
+
+ eeprom {
+ wdt_timeout: wdt-timeout@8E {
+ reg = <0x8E 2>;
+ };
+ };
+
+ watchdog {
+ compatible = "zii,rave-sp-watchdog";
+ nvmem-cells = <&wdt_timeout>;
+ nvmem-cell-names = "wdt-timeout";
+ };
+ }
+
diff --git a/Documentation/doc-guide/kernel-doc.rst b/Documentation/doc-guide/kernel-doc.rst
index 0268335414ce..722d4525f7cf 100644
--- a/Documentation/doc-guide/kernel-doc.rst
+++ b/Documentation/doc-guide/kernel-doc.rst
@@ -112,16 +112,17 @@ Example kernel-doc function comment::
/**
* foobar() - Brief description of foobar.
- * @arg: Description of argument of foobar.
+ * @argument1: Description of parameter argument1 of foobar.
+ * @argument2: Description of parameter argument2 of foobar.
*
* Longer description of foobar.
*
* Return: Description of return value of foobar.
*/
- int foobar(int arg)
+ int foobar(int argument1, char *argument2)
The format is similar for documentation for structures, enums, paragraphs,
-etc. See the sections below for details.
+etc. See the sections below for specific details of each type.
The kernel-doc structure is extracted from the comments, and proper `Sphinx C
Domain`_ function and type descriptions with anchors are generated for them. The
@@ -130,6 +131,226 @@ cross-references. See below for details.
.. _Sphinx C Domain: http://www.sphinx-doc.org/en/stable/domains.html
+
+Parameters and member arguments
+-------------------------------
+
+The kernel-doc function comments describe each parameter to the function and
+function typedefs or each member of struct/union, in order, with the
+``@argument:`` descriptions. For each non-private member argument, one
+``@argument`` definition is needed.
+
+The ``@argument:`` descriptions begin on the very next line following
+the opening brief function description line, with no intervening blank
+comment lines.
+
+The ``@argument:`` descriptions may span multiple lines.
+
+.. note::
+
+ If the ``@argument`` description has multiple lines, the continuation
+ of the description should be starting exactly at the same column as
+ the previous line, e. g.::
+
+ * @argument: some long description
+ * that continues on next lines
+
+ or::
+
+ * @argument:
+ * some long description
+ * that continues on next lines
+
+If a function or typedef parameter argument is ``...`` (e. g. a variable
+number of arguments), its description should be listed in kernel-doc
+notation as::
+
+ * @...: description
+
+Private members
+~~~~~~~~~~~~~~~
+
+Inside a struct or union description, you can use the ``private:`` and
+``public:`` comment tags. Structure fields that are inside a ``private:``
+area are not listed in the generated output documentation.
+
+The ``private:`` and ``public:`` tags must begin immediately following a
+``/*`` comment marker. They may optionally include comments between the
+``:`` and the ending ``*/`` marker.
+
+Example::
+
+ /**
+ * struct my_struct - short description
+ * @a: first member
+ * @b: second member
+ * @d: fourth member
+ *
+ * Longer description
+ */
+ struct my_struct {
+ int a;
+ int b;
+ /* private: internal use only */
+ int c;
+ /* public: the next one is public */
+ int d;
+ };
+
+Function documentation
+----------------------
+
+The general format of a function and function-like macro kernel-doc comment is::
+
+ /**
+ * function_name() - Brief description of function.
+ * @arg1: Describe the first argument.
+ * @arg2: Describe the second argument.
+ * One can provide multiple line descriptions
+ * for arguments.
+ *
+ * A longer description, with more discussion of the function function_name()
+ * that might be useful to those using or modifying it. Begins with an
+ * empty comment line, and may include additional embedded empty
+ * comment lines.
+ *
+ * The longer description may have multiple paragraphs.
+ *
+ * Return: Describe the return value of foobar.
+ *
+ * The return value description can also have multiple paragraphs, and should
+ * be placed at the end of the comment block.
+ */
+
+The brief description following the function name may span multiple lines, and
+ends with an argument description, a blank comment line, or the end of the
+comment block.
+
+Return values
+~~~~~~~~~~~~~
+
+The return value, if any, should be described in a dedicated section
+named ``Return``.
+
+.. note::
+
+ #) The multi-line descriptive text you provide does *not* recognize
+ line breaks, so if you try to format some text nicely, as in::
+
+ * Return:
+ * 0 - OK
+ * -EINVAL - invalid argument
+ * -ENOMEM - out of memory
+
+ this will all run together and produce::
+
+ Return: 0 - OK -EINVAL - invalid argument -ENOMEM - out of memory
+
+ So, in order to produce the desired line breaks, you need to use a
+ ReST list, e. g.::
+
+ * Return:
+ * * 0 - OK to runtime suspend the device
+ * * -EBUSY - Device should not be runtime suspended
+
+ #) If the descriptive text you provide has lines that begin with
+ some phrase followed by a colon, each of those phrases will be taken
+ as a new section heading, with probably won't produce the desired
+ effect.
+
+Structure, union, and enumeration documentation
+-----------------------------------------------
+
+The general format of a struct, union, and enum kernel-doc comment is::
+
+ /**
+ * struct struct_name - Brief description.
+ * @argument: Description of member member_name.
+ *
+ * Description of the structure.
+ */
+
+On the above, ``struct`` is used to mean structs. You can also use ``union``
+and ``enum`` to describe unions and enums. ``argument`` is used
+to mean struct and union member names as well as enumerations in an enum.
+
+The brief description following the structure name may span multiple lines, and
+ends with a member description, a blank comment line, or the end of the
+comment block.
+
+The kernel-doc data structure comments describe each member of the structure,
+in order, with the member descriptions.
+
+Nested structs/unions
+~~~~~~~~~~~~~~~~~~~~~
+
+It is possible to document nested structs unions, like::
+
+ /**
+ * struct nested_foobar - a struct with nested unions and structs
+ * @arg1: - first argument of anonymous union/anonymous struct
+ * @arg2: - second argument of anonymous union/anonymous struct
+ * @arg3: - third argument of anonymous union/anonymous struct
+ * @arg4: - fourth argument of anonymous union/anonymous struct
+ * @bar.st1.arg1 - first argument of struct st1 on union bar
+ * @bar.st1.arg2 - second argument of struct st1 on union bar
+ * @bar.st2.arg1 - first argument of struct st2 on union bar
+ * @bar.st2.arg2 - second argument of struct st2 on union bar
+ struct nested_foobar {
+ /* Anonymous union/struct*/
+ union {
+ struct {
+ int arg1;
+ int arg2;
+ }
+ struct {
+ void *arg3;
+ int arg4;
+ }
+ }
+ union {
+ struct {
+ int arg1;
+ int arg2;
+ } st1;
+ struct {
+ void *arg1;
+ int arg2;
+ } st2;
+ } bar;
+ };
+
+.. note::
+
+ #) When documenting nested structs or unions, if the struct/union ``foo``
+ is named, the argument ``bar`` inside it should be documented as
+ ``@foo.bar:``
+ #) When the nested struct/union is anonymous, the argument ``bar`` on it
+ should be documented as ``@bar:``
+
+Typedef documentation
+---------------------
+
+The general format of a typedef kernel-doc comment is::
+
+ /**
+ * typedef type_name - Brief description.
+ *
+ * Description of the type.
+ */
+
+Typedefs with function prototypes can also be documented::
+
+ /**
+ * typedef type_name - Brief description.
+ * @arg1: description of arg1
+ * @arg2: description of arg2
+ *
+ * Description of the type.
+ */
+ typedef void (*type_name)(struct v4l2_ctrl *arg1, void *arg2);
+
+
Highlights and cross-references
-------------------------------
@@ -201,70 +422,7 @@ cross-references.
For further details, please refer to the `Sphinx C Domain`_ documentation.
-Function documentation
-----------------------
-
-The general format of a function and function-like macro kernel-doc comment is::
-
- /**
- * function_name() - Brief description of function.
- * @arg1: Describe the first argument.
- * @arg2: Describe the second argument.
- * One can provide multiple line descriptions
- * for arguments.
- *
- * A longer description, with more discussion of the function function_name()
- * that might be useful to those using or modifying it. Begins with an
- * empty comment line, and may include additional embedded empty
- * comment lines.
- *
- * The longer description may have multiple paragraphs.
- *
- * Return: Describe the return value of foobar.
- *
- * The return value description can also have multiple paragraphs, and should
- * be placed at the end of the comment block.
- */
-
-The brief description following the function name may span multiple lines, and
-ends with an ``@argument:`` description, a blank comment line, or the end of the
-comment block.
-
-The kernel-doc function comments describe each parameter to the function, in
-order, with the ``@argument:`` descriptions. The ``@argument:`` descriptions
-must begin on the very next line following the opening brief function
-description line, with no intervening blank comment lines. The ``@argument:``
-descriptions may span multiple lines. The continuation lines may contain
-indentation. If a function parameter is ``...`` (varargs), it should be listed
-in kernel-doc notation as: ``@...:``.
-
-The return value, if any, should be described in a dedicated section at the end
-of the comment starting with "Return:".
-
-Structure, union, and enumeration documentation
------------------------------------------------
-
-The general format of a struct, union, and enum kernel-doc comment is::
-
- /**
- * struct struct_name - Brief description.
- * @member_name: Description of member member_name.
- *
- * Description of the structure.
- */
-
-Below, "struct" is used to mean structs, unions and enums, and "member" is used
-to mean struct and union members as well as enumerations in an enum.
-
-The brief description following the structure name may span multiple lines, and
-ends with a ``@member:`` description, a blank comment line, or the end of the
-comment block.
-The kernel-doc data structure comments describe each member of the structure, in
-order, with the ``@member:`` descriptions. The ``@member:`` descriptions must
-begin on the very next line following the opening brief function description
-line, with no intervening blank comment lines. The ``@member:`` descriptions may
-span multiple lines. The continuation lines may contain indentation.
In-line member documentation comments
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -294,42 +452,6 @@ on a line of their own, like all other kernel-doc comments::
int foobar;
}
-Private members
-~~~~~~~~~~~~~~~
-
-Inside a struct description, you can use the "private:" and "public:" comment
-tags. Structure fields that are inside a "private:" area are not listed in the
-generated output documentation. The "private:" and "public:" tags must begin
-immediately following a ``/*`` comment marker. They may optionally include
-comments between the ``:`` and the ending ``*/`` marker.
-
-Example::
-
- /**
- * struct my_struct - short description
- * @a: first member
- * @b: second member
- *
- * Longer description
- */
- struct my_struct {
- int a;
- int b;
- /* private: internal use only */
- int c;
- };
-
-
-Typedef documentation
----------------------
-
-The general format of a typedef kernel-doc comment is::
-
- /**
- * typedef type_name - Brief description.
- *
- * Description of the type.
- */
Overview documentation comments
-------------------------------
@@ -376,3 +498,37 @@ file.
Data structures visible in kernel include files should also be documented using
kernel-doc formatted comments.
+
+How to use kernel-doc to generate man pages
+-------------------------------------------
+
+If you just want to use kernel-doc to generate man pages you can do this
+from the Kernel git tree::
+
+ $ scripts/kernel-doc -man $(git grep -l '/\*\*' |grep -v Documentation/) | ./split-man.pl /tmp/man
+
+Using the small ``split-man.pl`` script below::
+
+
+ #!/usr/bin/perl
+
+ if ($#ARGV < 0) {
+ die "where do I put the results?\n";
+ }
+
+ mkdir $ARGV[0],0777;
+ $state = 0;
+ while (<STDIN>) {
+ if (/^\.TH \"[^\"]*\" 9 \"([^\"]*)\"/) {
+ if ($state == 1) { close OUT }
+ $state = 1;
+ $fn = "$ARGV[0]/$1.9";
+ print STDERR "Creating $fn\n";
+ open OUT, ">$fn" or die "can't open $fn: $!\n";
+ print OUT $_;
+ } elsif ($state != 0) {
+ print OUT $_;
+ }
+ }
+
+ close OUT;
diff --git a/Documentation/driver-api/basics.rst b/Documentation/driver-api/basics.rst
index 73fa7d42bbba..826e85d50a16 100644
--- a/Documentation/driver-api/basics.rst
+++ b/Documentation/driver-api/basics.rst
@@ -13,12 +13,6 @@ Driver device table
.. kernel-doc:: include/linux/mod_devicetable.h
:internal:
-Atomic and pointer manipulation
--------------------------------
-
-.. kernel-doc:: arch/x86/include/asm/atomic.h
- :internal:
-
Delaying, scheduling, and timer routines
----------------------------------------
@@ -85,6 +79,21 @@ Internal Functions
.. kernel-doc:: kernel/kthread.c
:export:
+Reference counting
+------------------
+
+.. kernel-doc:: include/linux/refcount.h
+ :internal:
+
+.. kernel-doc:: lib/refcount.c
+ :export:
+
+Atomics
+-------
+
+.. kernel-doc:: arch/x86/include/asm/atomic.h
+ :internal:
+
Kernel objects manipulation
---------------------------
diff --git a/Documentation/driver-api/dmaengine/provider.rst b/Documentation/driver-api/dmaengine/provider.rst
index 814acb4d2294..dfc4486b5743 100644
--- a/Documentation/driver-api/dmaengine/provider.rst
+++ b/Documentation/driver-api/dmaengine/provider.rst
@@ -111,40 +111,36 @@ The first thing you need to do in your driver is to allocate this
structure. Any of the usual memory allocators will do, but you'll also
need to initialize a few fields in there:
-- channels: should be initialized as a list using the
+- ``channels``: should be initialized as a list using the
INIT_LIST_HEAD macro for example
-- src_addr_widths:
+- ``src_addr_widths``:
should contain a bitmask of the supported source transfer width
-- dst_addr_widths:
+- ``dst_addr_widths``:
should contain a bitmask of the supported destination transfer width
-- directions:
+- ``directions``:
should contain a bitmask of the supported slave directions
(i.e. excluding mem2mem transfers)
-- residue_granularity:
+- ``residue_granularity``:
+ granularity of the transfer residue reported to dma_set_residue.
+ This can be either:
- - Granularity of the transfer residue reported to dma_set_residue.
- This can be either:
+ - Descriptor:
+ your device doesn't support any kind of residue
+ reporting. The framework will only know that a particular
+ transaction descriptor is done.
- - Descriptor
+ - Segment:
+ your device is able to report which chunks have been transferred
- - Your device doesn't support any kind of residue
- reporting. The framework will only know that a particular
- transaction descriptor is done.
+ - Burst:
+ your device is able to report which burst have been transferred
- - Segment
-
- - Your device is able to report which chunks have been transferred
-
- - Burst
-
- - Your device is able to report which burst have been transferred
-
- - dev: should hold the pointer to the ``struct device`` associated
- to your current driver instance.
+- ``dev``: should hold the pointer to the ``struct device`` associated
+ to your current driver instance.
Supported transaction types
---------------------------
diff --git a/Documentation/driver-api/iio/hw-consumer.rst b/Documentation/driver-api/iio/hw-consumer.rst
new file mode 100644
index 000000000000..8facce6a6733
--- /dev/null
+++ b/Documentation/driver-api/iio/hw-consumer.rst
@@ -0,0 +1,51 @@
+===========
+HW consumer
+===========
+An IIO device can be directly connected to another device in hardware. in this
+case the buffers between IIO provider and IIO consumer are handled by hardware.
+The Industrial I/O HW consumer offers a way to bond these IIO devices without
+software buffer for data. The implementation can be found under
+:file:`drivers/iio/buffer/hw-consumer.c`
+
+
+* struct :c:type:`iio_hw_consumer` — Hardware consumer structure
+* :c:func:`iio_hw_consumer_alloc` — Allocate IIO hardware consumer
+* :c:func:`iio_hw_consumer_free` — Free IIO hardware consumer
+* :c:func:`iio_hw_consumer_enable` — Enable IIO hardware consumer
+* :c:func:`iio_hw_consumer_disable` — Disable IIO hardware consumer
+
+
+HW consumer setup
+=================
+
+As standard IIO device the implementation is based on IIO provider/consumer.
+A typical IIO HW consumer setup looks like this::
+
+ static struct iio_hw_consumer *hwc;
+
+ static const struct iio_info adc_info = {
+ .read_raw = adc_read_raw,
+ };
+
+ static int adc_read_raw(struct iio_dev *indio_dev,
+ struct iio_chan_spec const *chan, int *val,
+ int *val2, long mask)
+ {
+ ret = iio_hw_consumer_enable(hwc);
+
+ /* Acquire data */
+
+ ret = iio_hw_consumer_disable(hwc);
+ }
+
+ static int adc_probe(struct platform_device *pdev)
+ {
+ hwc = devm_iio_hw_consumer_alloc(&iio->dev);
+ }
+
+More details
+============
+.. kernel-doc:: include/linux/iio/hw-consumer.h
+.. kernel-doc:: drivers/iio/buffer/industrialio-hw-consumer.c
+ :export:
+
diff --git a/Documentation/driver-api/iio/index.rst b/Documentation/driver-api/iio/index.rst
index e5c3922d1b6f..7fba341bd8b2 100644
--- a/Documentation/driver-api/iio/index.rst
+++ b/Documentation/driver-api/iio/index.rst
@@ -15,3 +15,4 @@ Contents:
buffers
triggers
triggered-buffers
+ hw-consumer
diff --git a/Documentation/driver-api/pm/devices.rst b/Documentation/driver-api/pm/devices.rst
index 53c1b0b06da5..1128705a5731 100644
--- a/Documentation/driver-api/pm/devices.rst
+++ b/Documentation/driver-api/pm/devices.rst
@@ -777,17 +777,51 @@ The driver can indicate that by setting ``DPM_FLAG_SMART_SUSPEND`` in
runtime suspend at the beginning of the ``suspend_late`` phase of system-wide
suspend (or in the ``poweroff_late`` phase of hibernation), when runtime PM
has been disabled for it, under the assumption that its state should not change
-after that point until the system-wide transition is over. If that happens, the
-driver's system-wide resume callbacks, if present, may still be invoked during
-the subsequent system-wide resume transition and the device's runtime power
-management status may be set to "active" before enabling runtime PM for it,
-so the driver must be prepared to cope with the invocation of its system-wide
-resume callbacks back-to-back with its ``->runtime_suspend`` one (without the
-intervening ``->runtime_resume`` and so on) and the final state of the device
-must reflect the "active" status for runtime PM in that case.
+after that point until the system-wide transition is over (the PM core itself
+does that for devices whose "noirq", "late" and "early" system-wide PM callbacks
+are executed directly by it). If that happens, the driver's system-wide resume
+callbacks, if present, may still be invoked during the subsequent system-wide
+resume transition and the device's runtime power management status may be set
+to "active" before enabling runtime PM for it, so the driver must be prepared to
+cope with the invocation of its system-wide resume callbacks back-to-back with
+its ``->runtime_suspend`` one (without the intervening ``->runtime_resume`` and
+so on) and the final state of the device must reflect the "active" runtime PM
+status in that case.
During system-wide resume from a sleep state it's easiest to put devices into
the full-power state, as explained in :file:`Documentation/power/runtime_pm.txt`.
-Refer to that document for more information regarding this particular issue as
+[Refer to that document for more information regarding this particular issue as
well as for information on the device runtime power management framework in
-general.
+general.]
+
+However, it often is desirable to leave devices in suspend after system
+transitions to the working state, especially if those devices had been in
+runtime suspend before the preceding system-wide suspend (or analogous)
+transition. Device drivers can use the ``DPM_FLAG_LEAVE_SUSPENDED`` flag to
+indicate to the PM core (and middle-layer code) that they prefer the specific
+devices handled by them to be left suspended and they have no problems with
+skipping their system-wide resume callbacks for this reason. Whether or not the
+devices will actually be left in suspend may depend on their state before the
+given system suspend-resume cycle and on the type of the system transition under
+way. In particular, devices are not left suspended if that transition is a
+restore from hibernation, as device states are not guaranteed to be reflected
+by the information stored in the hibernation image in that case.
+
+The middle-layer code involved in the handling of the device is expected to
+indicate to the PM core if the device may be left in suspend by setting its
+:c:member:`power.may_skip_resume` status bit which is checked by the PM core
+during the "noirq" phase of the preceding system-wide suspend (or analogous)
+transition. The middle layer is then responsible for handling the device as
+appropriate in its "noirq" resume callback, which is executed regardless of
+whether or not the device is left suspended, but the other resume callbacks
+(except for ``->complete``) will be skipped automatically by the PM core if the
+device really can be left in suspend.
+
+For devices whose "noirq", "late" and "early" driver callbacks are invoked
+directly by the PM core, all of the system-wide resume callbacks are skipped if
+``DPM_FLAG_LEAVE_SUSPENDED`` is set and the device is in runtime suspend during
+the ``suspend_noirq`` (or analogous) phase or the transition under way is a
+proper system suspend (rather than anything related to hibernation) and the
+device's wakeup settings are suitable for runtime PM (that is, it cannot
+generate wakeup signals at all or it is allowed to wake up the system from
+sleep).
diff --git a/Documentation/driver-api/scsi.rst b/Documentation/driver-api/scsi.rst
index 9ae03171daca..3ae337929721 100644
--- a/Documentation/driver-api/scsi.rst
+++ b/Documentation/driver-api/scsi.rst
@@ -224,6 +224,14 @@ mid to lowlevel SCSI driver interface
.. kernel-doc:: drivers/scsi/hosts.c
:export:
+drivers/scsi/scsi_common.c
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+general support functions
+
+.. kernel-doc:: drivers/scsi/scsi_common.c
+ :export:
+
Transport classes
-----------------
@@ -332,5 +340,5 @@ todo
~~~~
Parallel (fast/wide/ultra) SCSI, USB, SATA, SAS, Fibre Channel,
-FireWire, ATAPI devices, Infiniband, I20, iSCSI, Parallel ports,
+FireWire, ATAPI devices, Infiniband, I2O, iSCSI, Parallel ports,
netlink...
diff --git a/Documentation/driver-api/usb/usb3-debug-port.rst b/Documentation/driver-api/usb/usb3-debug-port.rst
index feb1a36a65b7..b9fd131f4723 100644
--- a/Documentation/driver-api/usb/usb3-debug-port.rst
+++ b/Documentation/driver-api/usb/usb3-debug-port.rst
@@ -98,3 +98,55 @@ you to check the sanity of the setup.
cat /dev/ttyUSB0
done
===== end of bash scripts ===============
+
+Serial TTY
+==========
+
+The DbC support has been added to the xHCI driver. You can get a
+debug device provided by the DbC at runtime.
+
+In order to use this, you need to make sure your kernel has been
+configured to support USB_XHCI_DBGCAP. A sysfs attribute under
+the xHCI device node is used to enable or disable DbC. By default,
+DbC is disabled::
+
+ root@target:/sys/bus/pci/devices/0000:00:14.0# cat dbc
+ disabled
+
+Enable DbC with the following command::
+
+ root@target:/sys/bus/pci/devices/0000:00:14.0# echo enable > dbc
+
+You can check the DbC state at anytime::
+
+ root@target:/sys/bus/pci/devices/0000:00:14.0# cat dbc
+ enabled
+
+Connect the debug target to the debug host with a USB 3.0 super-
+speed A-to-A debugging cable. You can see /dev/ttyDBC0 created
+on the debug target. You will see below kernel message lines::
+
+ root@target: tail -f /var/log/kern.log
+ [ 182.730103] xhci_hcd 0000:00:14.0: DbC connected
+ [ 191.169420] xhci_hcd 0000:00:14.0: DbC configured
+ [ 191.169597] xhci_hcd 0000:00:14.0: DbC now attached to /dev/ttyDBC0
+
+Accordingly, the DbC state has been brought up to::
+
+ root@target:/sys/bus/pci/devices/0000:00:14.0# cat dbc
+ configured
+
+On the debug host, you will see the debug device has been enumerated.
+You will see below kernel message lines::
+
+ root@host: tail -f /var/log/kern.log
+ [ 79.454780] usb 2-2.1: new SuperSpeed USB device number 3 using xhci_hcd
+ [ 79.475003] usb 2-2.1: LPM exit latency is zeroed, disabling LPM.
+ [ 79.475389] usb 2-2.1: New USB device found, idVendor=1d6b, idProduct=0010
+ [ 79.475390] usb 2-2.1: New USB device strings: Mfr=1, Product=2, SerialNumber=3
+ [ 79.475391] usb 2-2.1: Product: Linux USB Debug Target
+ [ 79.475392] usb 2-2.1: Manufacturer: Linux Foundation
+ [ 79.475393] usb 2-2.1: SerialNumber: 0001
+
+The debug device works now. You can use any communication or debugging
+program to talk between the host and the target.
diff --git a/Documentation/driver-api/usb/writing_usb_driver.rst b/Documentation/driver-api/usb/writing_usb_driver.rst
index 69f077dcdb78..4fe1c06b6a13 100644
--- a/Documentation/driver-api/usb/writing_usb_driver.rst
+++ b/Documentation/driver-api/usb/writing_usb_driver.rst
@@ -321,6 +321,6 @@ linux-usb-devel Mailing List Archives:
http://marc.theaimsgroup.com/?l=linux-usb-devel
Programming Guide for Linux USB Device Drivers:
-http://usb.cs.tum.edu/usbdoc
+http://lmu.web.psi.ch/docu/manuals/software_manuals/linux_sl/usb_linux_programming_guide.pdf
USB Home Page: http://www.usb.org
diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index c180045eb43b..7c1bb3d0c222 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -384,6 +384,9 @@ RESET
devm_reset_control_get()
devm_reset_controller_register()
+SERDEV
+ devm_serdev_device_open()
+
SLAVE DMA ENGINE
devm_acpi_dma_controller_register()
diff --git a/Documentation/fault-injection/fault-injection.txt b/Documentation/fault-injection/fault-injection.txt
index 918972babcd8..de1dc35fe500 100644
--- a/Documentation/fault-injection/fault-injection.txt
+++ b/Documentation/fault-injection/fault-injection.txt
@@ -1,7 +1,7 @@
Fault injection capabilities infrastructure
===========================================
-See also drivers/md/faulty.c and "every_nth" module option for scsi_debug.
+See also drivers/md/md-faulty.c and "every_nth" module option for scsi_debug.
Available fault injection capabilities
@@ -30,6 +30,12 @@ o fail_mmc_request
injects MMC data errors on devices permitted by setting
debugfs entries under /sys/kernel/debug/mmc0/fail_mmc_request
+o fail_function
+
+ injects error return on specific functions, which are marked by
+ ALLOW_ERROR_INJECTION() macro, by setting debugfs entries
+ under /sys/kernel/debug/fail_function. No boot option supported.
+
Configure fault-injection capabilities behavior
-----------------------------------------------
@@ -123,6 +129,29 @@ configuration of fault-injection capabilities.
default is 'N', setting it to 'Y' will disable failure injections
when dealing with private (address space) futexes.
+- /sys/kernel/debug/fail_function/inject:
+
+ Format: { 'function-name' | '!function-name' | '' }
+ specifies the target function of error injection by name.
+ If the function name leads '!' prefix, given function is
+ removed from injection list. If nothing specified ('')
+ injection list is cleared.
+
+- /sys/kernel/debug/fail_function/injectable:
+
+ (read only) shows error injectable functions and what type of
+ error values can be specified. The error type will be one of
+ below;
+ - NULL: retval must be 0.
+ - ERRNO: retval must be -1 to -MAX_ERRNO (-4096).
+ - ERR_NULL: retval must be 0 or -1 to -MAX_ERRNO (-4096).
+
+- /sys/kernel/debug/fail_function/<functiuon-name>/retval:
+
+ specifies the "error" return value to inject to the given
+ function for given function. This will be created when
+ user specifies new injection entry.
+
o Boot option
In order to inject faults while debugfs is not available (early boot time),
@@ -268,6 +297,45 @@ trap "echo 0 > /sys/kernel/debug/$FAILTYPE/probability" SIGINT SIGTERM EXIT
echo "Injecting errors into the module $module... (interrupt to stop)"
sleep 1000000
+------------------------------------------------------------------------------
+
+o Inject open_ctree error while btrfs mount
+
+#!/bin/bash
+
+rm -f testfile.img
+dd if=/dev/zero of=testfile.img bs=1M seek=1000 count=1
+DEVICE=$(losetup --show -f testfile.img)
+mkfs.btrfs -f $DEVICE
+mkdir -p tmpmnt
+
+FAILTYPE=fail_function
+FAILFUNC=open_ctree
+echo $FAILFUNC > /sys/kernel/debug/$FAILTYPE/inject
+echo -12 > /sys/kernel/debug/$FAILTYPE/$FAILFUNC/retval
+echo N > /sys/kernel/debug/$FAILTYPE/task-filter
+echo 100 > /sys/kernel/debug/$FAILTYPE/probability
+echo 0 > /sys/kernel/debug/$FAILTYPE/interval
+echo -1 > /sys/kernel/debug/$FAILTYPE/times
+echo 0 > /sys/kernel/debug/$FAILTYPE/space
+echo 1 > /sys/kernel/debug/$FAILTYPE/verbose
+
+mount -t btrfs $DEVICE tmpmnt
+if [ $? -ne 0 ]
+then
+ echo "SUCCESS!"
+else
+ echo "FAILED!"
+ umount tmpmnt
+fi
+
+echo > /sys/kernel/debug/$FAILTYPE/inject
+
+rmdir tmpmnt
+losetup -d $DEVICE
+rm testfile.img
+
+
Tool to run command with failslab or fail_page_alloc
----------------------------------------------------
In order to make it easier to accomplish the tasks mentioned above, we can use
diff --git a/Documentation/features/debug/KASAN/arch-support.txt b/Documentation/features/debug/KASAN/arch-support.txt
index f377290fe48e..3406fae833c3 100644
--- a/Documentation/features/debug/KASAN/arch-support.txt
+++ b/Documentation/features/debug/KASAN/arch-support.txt
@@ -35,5 +35,5 @@
| um: | TODO |
| unicore32: | TODO |
| x86: | ok | 64-bit only
- | xtensa: | TODO |
+ | xtensa: | ok |
-----------------------
diff --git a/Documentation/features/debug/stackprotector/arch-support.txt b/Documentation/features/debug/stackprotector/arch-support.txt
index d7acd7bd3619..59a4c9ffb7f3 100644
--- a/Documentation/features/debug/stackprotector/arch-support.txt
+++ b/Documentation/features/debug/stackprotector/arch-support.txt
@@ -35,5 +35,5 @@
| um: | TODO |
| unicore32: | TODO |
| x86: | ok |
- | xtensa: | TODO |
+ | xtensa: | ok |
-----------------------
diff --git a/Documentation/filesystems/ext2.txt b/Documentation/filesystems/ext2.txt
index 55755395d3dc..81c0becab225 100644
--- a/Documentation/filesystems/ext2.txt
+++ b/Documentation/filesystems/ext2.txt
@@ -49,12 +49,10 @@ sb=n Use alternate superblock at this location.
user_xattr Enable "user." POSIX Extended Attributes
(requires CONFIG_EXT2_FS_XATTR).
- See also http://acl.bestbits.at
nouser_xattr Don't support "user." extended attributes.
acl Enable POSIX Access Control Lists support
(requires CONFIG_EXT2_FS_POSIX_ACL).
- See also http://acl.bestbits.at
noacl Don't support POSIX ACLs.
nobh Do not attach buffer_heads to file pagecache.
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt
index 75236c0c2ac2..8cd63e16f171 100644
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt
@@ -202,15 +202,14 @@ inode_readahead_blks=n This tuning parameter controls the maximum
the buffer cache. The default value is 32 blocks.
nouser_xattr Disables Extended User Attributes. See the
- attr(5) manual page and http://acl.bestbits.at/
- for more information about extended attributes.
+ attr(5) manual page for more information about
+ extended attributes.
noacl This option disables POSIX Access Control List
support. If ACL support is enabled in the kernel
configuration (CONFIG_EXT4_FS_POSIX_ACL), ACL is
enabled by default on mount. See the acl(5) manual
- page and http://acl.bestbits.at/ for more information
- about acl.
+ page for more information about acl.
bsddf (*) Make 'df' act like BSD.
minixdf Make 'df' act like Minix.
diff --git a/Documentation/filesystems/nfs/Exporting b/Documentation/filesystems/nfs/Exporting
index 520a4becb75c..63889149f532 100644
--- a/Documentation/filesystems/nfs/Exporting
+++ b/Documentation/filesystems/nfs/Exporting
@@ -56,13 +56,25 @@ a/ A dentry flag DCACHE_DISCONNECTED which is set on
any dentry that might not be part of the proper prefix.
This is set when anonymous dentries are created, and cleared when a
dentry is noticed to be a child of a dentry which is in the proper
- prefix.
-
-b/ A per-superblock list "s_anon" of dentries which are the roots of
- subtrees that are not in the proper prefix. These dentries, as
- well as the proper prefix, need to be released at unmount time. As
- these dentries will not be hashed, they are linked together on the
- d_hash list_head.
+ prefix. If the refcount on a dentry with this flag set
+ becomes zero, the dentry is immediately discarded, rather than being
+ kept in the dcache. If a dentry that is not already in the dcache
+ is repeatedly accessed by filehandle (as NFSD might do), an new dentry
+ will be a allocated for each access, and discarded at the end of
+ the access.
+
+ Note that such a dentry can acquire children, name, ancestors, etc.
+ without losing DCACHE_DISCONNECTED - that flag is only cleared when
+ subtree is successfully reconnected to root. Until then dentries
+ in such subtree are retained only as long as there are references;
+ refcount reaching zero means immediate eviction, same as for unhashed
+ dentries. That guarantees that we won't need to hunt them down upon
+ umount.
+
+b/ A primitive for creation of secondary roots - d_obtain_root(inode).
+ Those do _not_ bear DCACHE_DISCONNECTED. They are placed on the
+ per-superblock list (->s_roots), so they can be located at umount
+ time for eviction purposes.
c/ Helper routines to allocate anonymous dentries, and to help attach
loose directory dentries at lookup time. They are:
@@ -77,7 +89,6 @@ c/ Helper routines to allocate anonymous dentries, and to help attach
(such as an anonymous one created by d_obtain_alias), if appropriate.
It returns NULL when the passed-in dentry is used, following the calling
convention of ->lookup.
-
Filesystem Issues
-----------------
diff --git a/Documentation/filesystems/nilfs2.txt b/Documentation/filesystems/nilfs2.txt
index c0727dc36271..f2f3f8592a6f 100644
--- a/Documentation/filesystems/nilfs2.txt
+++ b/Documentation/filesystems/nilfs2.txt
@@ -25,8 +25,8 @@ available from the following download page. At least "mkfs.nilfs2",
cleaner or garbage collector) are required. Details on the tools are
described in the man pages included in the package.
-Project web page: http://nilfs.sourceforge.net/
-Download page: http://nilfs.sourceforge.net/en/download.html
+Project web page: https://nilfs.sourceforge.io/
+Download page: https://nilfs.sourceforge.io/en/download.html
List info: http://vger.kernel.org/vger-lists.html#linux-nilfs
Caveats
diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt
index cf51360e3a9f..91031298beb1 100644
--- a/Documentation/filesystems/vfat.txt
+++ b/Documentation/filesystems/vfat.txt
@@ -344,4 +344,4 @@ the following:
characters in the final slot are set to Unicode 0xFFFF.
Finally, note that the extended name is stored in Unicode. Each Unicode
-character takes two bytes.
+character takes either two or four bytes, UTF-16LE encoded.
diff --git a/Documentation/gpio/board.txt b/Documentation/gpio/board.txt
index a0f61898d493..659bb19f5b3c 100644
--- a/Documentation/gpio/board.txt
+++ b/Documentation/gpio/board.txt
@@ -2,6 +2,7 @@ GPIO Mappings
=============
This document explains how GPIOs can be assigned to given devices and functions.
+
Note that it only applies to the new descriptor-based interface. For a
description of the deprecated integer-based GPIO interface please refer to
gpio-legacy.txt (actually, there is no real mapping possible with the old
@@ -49,7 +50,7 @@ This property will make GPIOs 15, 16 and 17 available to the driver under the
power = gpiod_get(dev, "power", GPIOD_OUT_HIGH);
-The led GPIOs will be active-high, while the power GPIO will be active-low (i.e.
+The led GPIOs will be active high, while the power GPIO will be active low (i.e.
gpiod_is_active_low(power) will be true).
The second parameter of the gpiod_get() functions, the con_id string, has to be
@@ -122,9 +123,14 @@ where
can be NULL, in which case it will match any function.
- idx is the index of the GPIO within the function.
- flags is defined to specify the following properties:
- * GPIOF_ACTIVE_LOW - to configure the GPIO as active-low
- * GPIOF_OPEN_DRAIN - GPIO pin is open drain type.
- * GPIOF_OPEN_SOURCE - GPIO pin is open source type.
+ * GPIO_ACTIVE_HIGH - GPIO line is active high
+ * GPIO_ACTIVE_LOW - GPIO line is active low
+ * GPIO_OPEN_DRAIN - GPIO line is set up as open drain
+ * GPIO_OPEN_SOURCE - GPIO line is set up as open source
+ * GPIO_PERSISTENT - GPIO line is persistent during
+ suspend/resume and maintains its value
+ * GPIO_TRANSITORY - GPIO line is transitory and may loose its
+ electrical state during suspend/resume
In the future, these flags might be extended to support more properties.
diff --git a/Documentation/gpio/consumer.txt b/Documentation/gpio/consumer.txt
index 63e1bd1d88e3..d53e5b5cfc9c 100644
--- a/Documentation/gpio/consumer.txt
+++ b/Documentation/gpio/consumer.txt
@@ -66,6 +66,15 @@ for the GPIO. Values can be:
* GPIOD_IN to initialize the GPIO as input.
* GPIOD_OUT_LOW to initialize the GPIO as output with a value of 0.
* GPIOD_OUT_HIGH to initialize the GPIO as output with a value of 1.
+* GPIOD_OUT_LOW_OPEN_DRAIN same as GPIOD_OUT_LOW but also enforce the line
+ to be electrically used with open drain.
+* GPIOD_OUT_HIGH_OPEN_DRAIN same as GPIOD_OUT_HIGH but also enforce the line
+ to be electrically used with open drain.
+
+The two last flags are used for use cases where open drain is mandatory, such
+as I2C: if the line is not already configured as open drain in the mappings
+(see board.txt), then open drain will be enforced anyway and a warning will be
+printed that the board configuration needs to be updated to match the use case.
Both functions return either a valid GPIO descriptor, or an error code checkable
with IS_ERR() (they will never return a NULL pointer). -ENOENT will be returned
@@ -184,7 +193,7 @@ A driver can also query the current direction of a GPIO:
int gpiod_get_direction(const struct gpio_desc *desc)
-This function will return either GPIOF_DIR_IN or GPIOF_DIR_OUT.
+This function returns 0 for output, 1 for input, or an error code in case of error.
Be aware that there is no default direction for GPIOs. Therefore, **using a GPIO
without setting its direction first is illegal and will result in undefined
@@ -240,59 +249,71 @@ that can't be accessed from hardIRQ handlers, these calls act the same as the
spinlock-safe calls.
-Active-low State and Raw GPIO Values
-------------------------------------
-Device drivers like to manage the logical state of a GPIO, i.e. the value their
-device will actually receive, no matter what lies between it and the GPIO line.
-In some cases, it might make sense to control the actual GPIO line value. The
-following set of calls ignore the active-low property of a GPIO and work on the
-raw line value:
-
- int gpiod_get_raw_value(const struct gpio_desc *desc)
- void gpiod_set_raw_value(struct gpio_desc *desc, int value)
- int gpiod_get_raw_value_cansleep(const struct gpio_desc *desc)
- void gpiod_set_raw_value_cansleep(struct gpio_desc *desc, int value)
- int gpiod_direction_output_raw(struct gpio_desc *desc, int value)
-
-The active-low state of a GPIO can also be queried using the following call:
-
- int gpiod_is_active_low(const struct gpio_desc *desc)
-
-Note that these functions should only be used with great moderation ; a driver
-should not have to care about the physical line level.
-
-
-The active-low property
------------------------
-
-As a driver should not have to care about the physical line level, all of the
+The active low and open drain semantics
+---------------------------------------
+As a consumer should not have to care about the physical line level, all of the
gpiod_set_value_xxx() or gpiod_set_array_value_xxx() functions operate with
-the *logical* value. With this they take the active-low property into account.
-This means that they check whether the GPIO is configured to be active-low,
+the *logical* value. With this they take the active low property into account.
+This means that they check whether the GPIO is configured to be active low,
and if so, they manipulate the passed value before the physical line level is
driven.
+The same is applicable for open drain or open source output lines: those do not
+actively drive their output high (open drain) or low (open source), they just
+switch their output to a high impedance value. The consumer should not need to
+care. (For details read about open drain in driver.txt.)
+
With this, all the gpiod_set_(array)_value_xxx() functions interpret the
-parameter "value" as "active" ("1") or "inactive" ("0"). The physical line
+parameter "value" as "asserted" ("1") or "de-asserted" ("0"). The physical line
level will be driven accordingly.
-As an example, if the active-low property for a dedicated GPIO is set, and the
-gpiod_set_(array)_value_xxx() passes "active" ("1"), the physical line level
+As an example, if the active low property for a dedicated GPIO is set, and the
+gpiod_set_(array)_value_xxx() passes "asserted" ("1"), the physical line level
will be driven low.
To summarize:
-Function (example) active-low property physical line
-gpiod_set_raw_value(desc, 0); don't care low
-gpiod_set_raw_value(desc, 1); don't care high
-gpiod_set_value(desc, 0); default (active-high) low
-gpiod_set_value(desc, 1); default (active-high) high
-gpiod_set_value(desc, 0); active-low high
-gpiod_set_value(desc, 1); active-low low
-
-Please note again that the set_raw/get_raw functions should be avoided as much
-as possible, especially by drivers which should not care about the actual
-physical line level and worry about the logical value instead.
+Function (example) line property physical line
+gpiod_set_raw_value(desc, 0); don't care low
+gpiod_set_raw_value(desc, 1); don't care high
+gpiod_set_value(desc, 0); default (active high) low
+gpiod_set_value(desc, 1); default (active high) high
+gpiod_set_value(desc, 0); active low high
+gpiod_set_value(desc, 1); active low low
+gpiod_set_value(desc, 0); default (active high) low
+gpiod_set_value(desc, 1); default (active high) high
+gpiod_set_value(desc, 0); open drain low
+gpiod_set_value(desc, 1); open drain high impedance
+gpiod_set_value(desc, 0); open source high impedance
+gpiod_set_value(desc, 1); open source high
+
+It is possible to override these semantics using the *set_raw/'get_raw functions
+but it should be avoided as much as possible, especially by system-agnostic drivers
+which should not need to care about the actual physical line level and worry about
+the logical value instead.
+
+
+Accessing raw GPIO values
+-------------------------
+Consumers exist that need to manage the logical state of a GPIO line, i.e. the value
+their device will actually receive, no matter what lies between it and the GPIO
+line.
+
+The following set of calls ignore the active-low or open drain property of a GPIO and
+work on the raw line value:
+
+ int gpiod_get_raw_value(const struct gpio_desc *desc)
+ void gpiod_set_raw_value(struct gpio_desc *desc, int value)
+ int gpiod_get_raw_value_cansleep(const struct gpio_desc *desc)
+ void gpiod_set_raw_value_cansleep(struct gpio_desc *desc, int value)
+ int gpiod_direction_output_raw(struct gpio_desc *desc, int value)
+
+The active low state of a GPIO can also be queried using the following call:
+
+ int gpiod_is_active_low(const struct gpio_desc *desc)
+
+Note that these functions should only be used with great moderation; a driver
+should not have to care about the physical line level or open drain semantics.
Access multiple GPIOs with a single function call
diff --git a/Documentation/gpio/driver.txt b/Documentation/gpio/driver.txt
index d8de1c7de85a..3392a0fd4c23 100644
--- a/Documentation/gpio/driver.txt
+++ b/Documentation/gpio/driver.txt
@@ -88,6 +88,10 @@ ending up in the pin control back-end "behind" the GPIO controller, usually
closer to the actual pins. This way the pin controller can manage the below
listed GPIO configurations.
+If a pin controller back-end is used, the GPIO controller or hardware
+description needs to provide "GPIO ranges" mapping the GPIO line offsets to pin
+numbers on the pin controller so they can properly cross-reference each other.
+
GPIOs with debounce support
---------------------------
diff --git a/Documentation/gpio/sysfs.txt b/Documentation/gpio/sysfs.txt
index aeab01aa4d00..6cdeab8650cd 100644
--- a/Documentation/gpio/sysfs.txt
+++ b/Documentation/gpio/sysfs.txt
@@ -1,6 +1,17 @@
GPIO Sysfs Interface for Userspace
==================================
+THIS ABI IS DEPRECATED, THE ABI DOCUMENTATION HAS BEEN MOVED TO
+Documentation/ABI/obsolete/sysfs-gpio AND NEW USERSPACE CONSUMERS
+ARE SUPPOSED TO USE THE CHARACTER DEVICE ABI. THIS OLD SYSFS ABI WILL
+NOT BE DEVELOPED (NO NEW FEATURES), IT WILL JUST BE MAINTAINED.
+
+Refer to the examples in tools/gpio/* for an introduction to the new
+character device ABI. Also see the userspace header in
+include/uapi/linux/gpio.h
+
+The deprecated sysfs ABI
+------------------------
Platforms which use the "gpiolib" implementors framework may choose to
configure a sysfs user interface to GPIOs. This is different from the
debugfs interface, since it provides control over GPIO direction and
diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst
index 2e7ee0313c1c..e94d3ac2bdd0 100644
--- a/Documentation/gpu/i915.rst
+++ b/Documentation/gpu/i915.rst
@@ -341,10 +341,7 @@ GuC
GuC-specific firmware loader
----------------------------
-.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_loader.c
- :doc: GuC-specific firmware loader
-
-.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_loader.c
+.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_fw.c
:internal:
GuC-based command submission
diff --git a/Documentation/hwmon/lm25066 b/Documentation/hwmon/lm25066
index 3fa6bf820c88..51b32aa203a8 100644
--- a/Documentation/hwmon/lm25066
+++ b/Documentation/hwmon/lm25066
@@ -8,11 +8,6 @@ Supported chips:
Datasheets:
http://www.ti.com/lit/gpn/lm25056
http://www.ti.com/lit/gpn/lm25056a
- * TI LM25063
- Prefix: 'lm25063'
- Addresses scanned: -
- Datasheet:
- To be announced
* National Semiconductor LM25066
Prefix: 'lm25066'
Addresses scanned: -
@@ -42,7 +37,7 @@ Description
-----------
This driver supports hardware monitoring for National Semiconductor / TI LM25056,
-LM25063, LM25066, LM5064, and LM5066/LM5066I Power Management, Monitoring,
+LM25066, LM5064, and LM5066/LM5066I Power Management, Monitoring,
Control, and Protection ICs.
The driver is a client driver to the core PMBus driver. Please see
@@ -74,12 +69,8 @@ in1_input Measured input voltage.
in1_average Average measured input voltage.
in1_min Minimum input voltage.
in1_max Maximum input voltage.
-in1_crit Critical high input voltage (LM25063 only).
-in1_lcrit Critical low input voltage (LM25063 only).
in1_min_alarm Input voltage low alarm.
in1_max_alarm Input voltage high alarm.
-in1_lcrit_alarm Input voltage critical low alarm (LM25063 only).
-in1_crit_alarm Input voltage critical high alarm. (LM25063 only).
in2_label "vmon"
in2_input Measured voltage on VAUX pin
@@ -94,16 +85,12 @@ in3_input Measured output voltage.
in3_average Average measured output voltage.
in3_min Minimum output voltage.
in3_min_alarm Output voltage low alarm.
-in3_highest Historical minimum output voltage (LM25063 only).
-in3_lowest Historical maximum output voltage (LM25063 only).
curr1_label "iin"
curr1_input Measured input current.
curr1_average Average measured input current.
curr1_max Maximum input current.
-curr1_crit Critical input current (LM25063 only).
curr1_max_alarm Input current high alarm.
-curr1_crit_alarm Input current critical high alarm (LM25063 only).
power1_label "pin"
power1_input Measured input power.
@@ -113,11 +100,6 @@ power1_alarm Input power alarm
power1_input_highest Historical maximum power.
power1_reset_history Write any value to reset maximum power history.
-power2_label "pout". LM25063 only.
-power2_input Measured output power.
-power2_max Maximum output power limit.
-power2_crit Critical output power limit.
-
temp1_input Measured temperature.
temp1_max Maximum temperature.
temp1_crit Critical high temperature.
diff --git a/Documentation/hwmon/max31785 b/Documentation/hwmon/max31785
index 45fb6093dec2..270c5f865261 100644
--- a/Documentation/hwmon/max31785
+++ b/Documentation/hwmon/max31785
@@ -17,8 +17,9 @@ management with temperature and remote voltage sensing. Various fan control
features are provided, including PWM frequency control, temperature hysteresis,
dual tachometer measurements, and fan health monitoring.
-For dual rotor fan configuration, the MAX31785 exposes the slowest rotor of the
-two in the fan[1-4]_input attributes.
+For dual-rotor configurations the MAX31785A exposes the second rotor tachometer
+readings in attributes fan[5-8]_input. By contrast the MAX31785 only exposes
+the slowest rotor measurement, and does so in the fan[1-4]_input attributes.
Usage Notes
-----------
@@ -31,7 +32,9 @@ Sysfs attributes
fan[1-4]_alarm Fan alarm.
fan[1-4]_fault Fan fault.
-fan[1-4]_input Fan RPM.
+fan[1-8]_input Fan RPM. On the MAX31785A, inputs 5-8 correspond to the
+ second rotor of fans 1-4
+fan[1-4]_target Fan input target
in[1-6]_crit Critical maximum output voltage
in[1-6]_crit_alarm Output voltage critical high alarm
@@ -44,6 +47,12 @@ in[1-6]_max_alarm Output voltage high alarm
in[1-6]_min Minimum output voltage
in[1-6]_min_alarm Output voltage low alarm
+pwm[1-4] Fan target duty cycle (0..255)
+pwm[1-4]_enable 0: Full-speed
+ 1: Manual PWM control
+ 2: Automatic PWM (tach-feedback RPM fan-control)
+ 3: Automatic closed-loop (temp-feedback fan-control)
+
temp[1-11]_crit Critical high temperature
temp[1-11]_crit_alarm Chip temperature critical high alarm
temp[1-11]_input Measured temperature
diff --git a/Documentation/hwmon/w83773g b/Documentation/hwmon/w83773g
new file mode 100644
index 000000000000..4cc6c0b8257f
--- /dev/null
+++ b/Documentation/hwmon/w83773g
@@ -0,0 +1,33 @@
+Kernel driver w83773g
+====================
+
+Supported chips:
+ * Nuvoton W83773G
+ Prefix: 'w83773g'
+ Addresses scanned: I2C 0x4c and 0x4d
+ Datasheet: https://www.nuvoton.com/resource-files/W83773G_SG_DatasheetV1_2.pdf
+
+Authors:
+ Lei YU <mine260309@gmail.com>
+
+Description
+-----------
+
+This driver implements support for Nuvoton W83773G temperature sensor
+chip. This chip implements one local and two remote sensors.
+The chip also features offsets for the two remote sensors which get added to
+the input readings. The chip does all the scaling by itself and the driver
+therefore reports true temperatures that don't need any user-space adjustments.
+Temperature is measured in degrees Celsius.
+The chip is wired over I2C/SMBus and specified over a temperature
+range of -40 to +125 degrees Celsius (for local sensor) and -40 to +127
+degrees Celsius (for remote sensors).
+Resolution for both the local and remote channels is 0.125 degree C.
+
+The chip supports only temperature measurement. The driver exports
+the temperature values via the following sysfs files:
+
+temp[1-3]_input
+temp[2-3]_fault
+temp[2-3]_offset
+update_interval
diff --git a/Documentation/i2c/dev-interface b/Documentation/i2c/dev-interface
index 5ff19447ac44..d04e6e4964ee 100644
--- a/Documentation/i2c/dev-interface
+++ b/Documentation/i2c/dev-interface
@@ -17,13 +17,16 @@ i2c-10, ...). All 256 minor device numbers are reserved for i2c.
C example
=========
-So let's say you want to access an i2c adapter from a C program. The
-first thing to do is "#include <linux/i2c-dev.h>". Please note that
-there are two files named "i2c-dev.h" out there, one is distributed
-with the Linux kernel and is meant to be included from kernel
-driver code, the other one is distributed with i2c-tools and is
-meant to be included from user-space programs. You obviously want
-the second one here.
+So let's say you want to access an i2c adapter from a C program.
+First, you need to include these two headers:
+
+ #include <linux/i2c-dev.h>
+ #include <i2c/smbus.h>
+
+(Please note that there are two files named "i2c-dev.h" out there. One is
+distributed with the Linux kernel and the other one is included in the
+source tree of i2c-tools. They used to be different in content but since 2012
+they're identical. You should use "linux/i2c-dev.h").
Now, you have to decide which adapter you want to access. You should
inspect /sys/class/i2c-dev/ or run "i2cdetect -l" to decide this.
diff --git a/Documentation/index.rst b/Documentation/index.rst
index cb7f1ba5b3b1..ef5080cbf009 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -13,6 +13,18 @@ documents into a coherent whole. Please note that improvements to the
documentation are welcome; join the linux-doc list at vger.kernel.org if
you want to help out.
+Licensing documentation
+-----------------------
+
+The following describes the license of the Linux kernel source code
+(GPLv2), how to properly mark the license of individual files in the source
+tree, as well as links to the full license text.
+
+.. toctree::
+ :maxdepth: 2
+
+ process/license-rules.rst
+
User-oriented documentation
---------------------------
@@ -52,6 +64,7 @@ merged much easier.
dev-tools/index
doc-guide/index
kernel-hacking/index
+ maintainer/index
Kernel API documentation
------------------------
diff --git a/Documentation/input/multi-touch-protocol.rst b/Documentation/input/multi-touch-protocol.rst
index 8035868c56bc..b51751a0cd5d 100644
--- a/Documentation/input/multi-touch-protocol.rst
+++ b/Documentation/input/multi-touch-protocol.rst
@@ -269,10 +269,11 @@ ABS_MT_ORIENTATION
The orientation of the touching ellipse. The value should describe a signed
quarter of a revolution clockwise around the touch center. The signed value
range is arbitrary, but zero should be returned for an ellipse aligned with
- the Y axis of the surface, a negative value when the ellipse is turned to
- the left, and a positive value when the ellipse is turned to the
- right. When completely aligned with the X axis, the range max should be
- returned.
+ the Y axis (north) of the surface, a negative value when the ellipse is
+ turned to the left, and a positive value when the ellipse is turned to the
+ right. When aligned with the X axis in the positive direction, the range
+ max should be returned; when aligned with the X axis in the negative
+ direction, the range -max should be returned.
Touch ellipsis are symmetrical by default. For devices capable of true 360
degree orientation, the reported orientation must exceed the range max to
diff --git a/Documentation/kbuild/kconfig-language.txt b/Documentation/kbuild/kconfig-language.txt
index 262722d8867b..f5b9493f04ad 100644
--- a/Documentation/kbuild/kconfig-language.txt
+++ b/Documentation/kbuild/kconfig-language.txt
@@ -77,6 +77,27 @@ applicable everywhere (see syntax).
Optionally, dependencies only for this default value can be added with
"if".
+ The default value deliberately defaults to 'n' in order to avoid bloating the
+ build. With few exceptions, new config options should not change this. The
+ intent is for "make oldconfig" to add as little as possible to the config from
+ release to release.
+
+ Note:
+ Things that merit "default y/m" include:
+
+ a) A new Kconfig option for something that used to always be built
+ should be "default y".
+
+ b) A new gatekeeping Kconfig option that hides/shows other Kconfig
+ options (but does not generate any code of its own), should be
+ "default y" so people will see those other options.
+
+ c) Sub-driver behavior or similar options for a driver that is
+ "default n". This allows you to provide sane defaults.
+
+ d) Hardware or infrastructure that everybody expects, such as CONFIG_NET
+ or CONFIG_BLOCK. These are rare exceptions.
+
- type definition + default value:
"def_bool"/"def_tristate" <expr> ["if" <expr>]
This is a shorthand notation for a type definition plus a value.
@@ -200,10 +221,14 @@ module state. Dependency expressions have the following syntax:
<expr> ::= <symbol> (1)
<symbol> '=' <symbol> (2)
<symbol> '!=' <symbol> (3)
- '(' <expr> ')' (4)
- '!' <expr> (5)
- <expr> '&&' <expr> (6)
- <expr> '||' <expr> (7)
+ <symbol1> '<' <symbol2> (4)
+ <symbol1> '>' <symbol2> (4)
+ <symbol1> '<=' <symbol2> (4)
+ <symbol1> '>=' <symbol2> (4)
+ '(' <expr> ')' (5)
+ '!' <expr> (6)
+ <expr> '&&' <expr> (7)
+ <expr> '||' <expr> (8)
Expressions are listed in decreasing order of precedence.
@@ -214,10 +239,13 @@ Expressions are listed in decreasing order of precedence.
otherwise 'n'.
(3) If the values of both symbols are equal, it returns 'n',
otherwise 'y'.
-(4) Returns the value of the expression. Used to override precedence.
-(5) Returns the result of (2-/expr/).
-(6) Returns the result of min(/expr/, /expr/).
-(7) Returns the result of max(/expr/, /expr/).
+(4) If value of <symbol1> is respectively lower, greater, lower-or-equal,
+ or greater-or-equal than value of <symbol2>, it returns 'y',
+ otherwise 'n'.
+(5) Returns the value of the expression. Used to override precedence.
+(6) Returns the result of (2-/expr/).
+(7) Returns the result of min(/expr/, /expr/).
+(8) Returns the result of max(/expr/, /expr/).
An expression can have a value of 'n', 'm' or 'y' (or 0, 1, 2
respectively for calculations). A menu entry becomes visible when its
diff --git a/Documentation/kernel-doc-nano-HOWTO.txt b/Documentation/kernel-doc-nano-HOWTO.txt
deleted file mode 100644
index c23e2c5ab80d..000000000000
--- a/Documentation/kernel-doc-nano-HOWTO.txt
+++ /dev/null
@@ -1,322 +0,0 @@
-NOTE: this document is outdated and will eventually be removed. See
-Documentation/doc-guide/ for current information.
-
-kernel-doc nano-HOWTO
-=====================
-
-How to format kernel-doc comments
----------------------------------
-
-In order to provide embedded, 'C' friendly, easy to maintain,
-but consistent and extractable documentation of the functions and
-data structures in the Linux kernel, the Linux kernel has adopted
-a consistent style for documenting functions and their parameters,
-and structures and their members.
-
-The format for this documentation is called the kernel-doc format.
-It is documented in this Documentation/kernel-doc-nano-HOWTO.txt file.
-
-This style embeds the documentation within the source files, using
-a few simple conventions. The scripts/kernel-doc perl script, the
-Documentation/sphinx/kerneldoc.py Sphinx extension and other tools understand
-these conventions, and are used to extract this embedded documentation
-into various documents.
-
-In order to provide good documentation of kernel functions and data
-structures, please use the following conventions to format your
-kernel-doc comments in Linux kernel source.
-
-We definitely need kernel-doc formatted documentation for functions
-that are exported to loadable modules using EXPORT_SYMBOL.
-
-We also look to provide kernel-doc formatted documentation for
-functions externally visible to other kernel files (not marked
-"static").
-
-We also recommend providing kernel-doc formatted documentation
-for private (file "static") routines, for consistency of kernel
-source code layout. But this is lower priority and at the
-discretion of the MAINTAINER of that kernel source file.
-
-Data structures visible in kernel include files should also be
-documented using kernel-doc formatted comments.
-
-The opening comment mark "/**" is reserved for kernel-doc comments.
-Only comments so marked will be considered by the kernel-doc scripts,
-and any comment so marked must be in kernel-doc format. Do not use
-"/**" to be begin a comment block unless the comment block contains
-kernel-doc formatted comments. The closing comment marker for
-kernel-doc comments can be either "*/" or "**/", but "*/" is
-preferred in the Linux kernel tree.
-
-Kernel-doc comments should be placed just before the function
-or data structure being described.
-
-Example kernel-doc function comment:
-
-/**
- * foobar() - short function description of foobar
- * @arg1: Describe the first argument to foobar.
- * @arg2: Describe the second argument to foobar.
- * One can provide multiple line descriptions
- * for arguments.
- *
- * A longer description, with more discussion of the function foobar()
- * that might be useful to those using or modifying it. Begins with
- * empty comment line, and may include additional embedded empty
- * comment lines.
- *
- * The longer description can have multiple paragraphs.
- *
- * Return: Describe the return value of foobar.
- */
-
-The short description following the subject can span multiple lines
-and ends with an @argument description, an empty line or the end of
-the comment block.
-
-The @argument descriptions must begin on the very next line following
-this opening short function description line, with no intervening
-empty comment lines.
-
-If a function parameter is "..." (varargs), it should be listed in
-kernel-doc notation as:
- * @...: description
-
-The return value, if any, should be described in a dedicated section
-named "Return".
-
-Example kernel-doc data structure comment.
-
-/**
- * struct blah - the basic blah structure
- * @mem1: describe the first member of struct blah
- * @mem2: describe the second member of struct blah,
- * perhaps with more lines and words.
- *
- * Longer description of this structure.
- */
-
-The kernel-doc function comments describe each parameter to the
-function, in order, with the @name lines.
-
-The kernel-doc data structure comments describe each structure member
-in the data structure, with the @name lines.
-
-The longer description formatting is "reflowed", losing your line
-breaks. So presenting carefully formatted lists within these
-descriptions won't work so well; derived documentation will lose
-the formatting.
-
-See the section below "How to add extractable documentation to your
-source files" for more details and notes on how to format kernel-doc
-comments.
-
-Components of the kernel-doc system
------------------------------------
-
-Many places in the source tree have extractable documentation in the
-form of block comments above functions. The components of this system
-are:
-
-- scripts/kernel-doc
-
- This is a perl script that hunts for the block comments and can mark
- them up directly into DocBook, ReST, man, text, and HTML. (No, not
- texinfo.)
-
-- scripts/docproc.c
-
- This is a program for converting SGML template files into SGML
- files. When a file is referenced it is searched for symbols
- exported (EXPORT_SYMBOL), to be able to distinguish between internal
- and external functions.
- It invokes kernel-doc, giving it the list of functions that
- are to be documented.
- Additionally it is used to scan the SGML template files to locate
- all the files referenced herein. This is used to generate dependency
- information as used by make.
-
-- Makefile
-
- The targets 'xmldocs', 'latexdocs', 'pdfdocs', 'epubdocs'and 'htmldocs'
- are used to build XML DocBook files, LaTeX files, PDF files,
- ePub files and html files in Documentation/.
-
-How to extract the documentation
---------------------------------
-
-If you just want to read the ready-made books on the various
-subsystems, just type 'make epubdocs', or 'make pdfdocs', or 'make htmldocs',
-depending on your preference. If you would rather read a different format,
-you can type 'make xmldocs' and then use DocBook tools to convert
-Documentation/output/*.xml to a format of your choice (for example,
-'db2html ...' if 'make htmldocs' was not defined).
-
-If you want to see man pages instead, you can do this:
-
-$ cd linux
-$ scripts/kernel-doc -man $(find -name '*.c') | split-man.pl /tmp/man
-$ scripts/kernel-doc -man $(find -name '*.h') | split-man.pl /tmp/man
-
-Here is split-man.pl:
-
--->
-#!/usr/bin/perl
-
-if ($#ARGV < 0) {
- die "where do I put the results?\n";
-}
-
-mkdir $ARGV[0],0777;
-$state = 0;
-while (<STDIN>) {
- if (/^\.TH \"[^\"]*\" 9 \"([^\"]*)\"/) {
- if ($state == 1) { close OUT }
- $state = 1;
- $fn = "$ARGV[0]/$1.9";
- print STDERR "Creating $fn\n";
- open OUT, ">$fn" or die "can't open $fn: $!\n";
- print OUT $_;
- } elsif ($state != 0) {
- print OUT $_;
- }
-}
-
-close OUT;
-<--
-
-If you just want to view the documentation for one function in one
-file, you can do this:
-
-$ scripts/kernel-doc -man -function fn file | nroff -man | less
-
-or this:
-
-$ scripts/kernel-doc -text -function fn file
-
-
-How to add extractable documentation to your source files
----------------------------------------------------------
-
-The format of the block comment is like this:
-
-/**
- * function_name(:)? (- short description)?
-(* @parameterx(space)*: (description of parameter x)?)*
-(* a blank line)?
- * (Description:)? (Description of function)?
- * (section header: (section description)? )*
-(*)?*/
-
-All "description" text can span multiple lines, although the
-function_name & its short description are traditionally on a single line.
-Description text may also contain blank lines (i.e., lines that contain
-only a "*").
-
-"section header:" names must be unique per function (or struct,
-union, typedef, enum).
-
-Use the section header "Return" for sections describing the return value
-of a function.
-
-Avoid putting a spurious blank line after the function name, or else the
-description will be repeated!
-
-All descriptive text is further processed, scanning for the following special
-patterns, which are highlighted appropriately.
-
-'funcname()' - function
-'$ENVVAR' - environment variable
-'&struct_name' - name of a structure (up to two words including 'struct')
-'@parameter' - name of a parameter
-'%CONST' - name of a constant.
-
-NOTE 1: The multi-line descriptive text you provide does *not* recognize
-line breaks, so if you try to format some text nicely, as in:
-
- Return:
- 0 - cool
- 1 - invalid arg
- 2 - out of memory
-
-this will all run together and produce:
-
- Return: 0 - cool 1 - invalid arg 2 - out of memory
-
-NOTE 2: If the descriptive text you provide has lines that begin with
-some phrase followed by a colon, each of those phrases will be taken as
-a new section heading, which means you should similarly try to avoid text
-like:
-
- Return:
- 0: cool
- 1: invalid arg
- 2: out of memory
-
-every line of which would start a new section. Again, probably not
-what you were after.
-
-Take a look around the source tree for examples.
-
-
-kernel-doc for structs, unions, enums, and typedefs
----------------------------------------------------
-
-Beside functions you can also write documentation for structs, unions,
-enums and typedefs. Instead of the function name you must write the name
-of the declaration; the struct/union/enum/typedef must always precede
-the name. Nesting of declarations is not supported.
-Use the argument mechanism to document members or constants.
-
-Inside a struct description, you can use the "private:" and "public:"
-comment tags. Structure fields that are inside a "private:" area
-are not listed in the generated output documentation. The "private:"
-and "public:" tags must begin immediately following a "/*" comment
-marker. They may optionally include comments between the ":" and the
-ending "*/" marker.
-
-Example:
-
-/**
- * struct my_struct - short description
- * @a: first member
- * @b: second member
- *
- * Longer description
- */
-struct my_struct {
- int a;
- int b;
-/* private: internal use only */
- int c;
-};
-
-
-Including documentation blocks in source files
-----------------------------------------------
-
-To facilitate having source code and comments close together, you can
-include kernel-doc documentation blocks that are free-form comments
-instead of being kernel-doc for functions, structures, unions,
-enums, or typedefs. This could be used for something like a
-theory of operation for a driver or library code, for example.
-
-This is done by using a DOC: section keyword with a section title. E.g.:
-
-/**
- * DOC: Theory of Operation
- *
- * The whizbang foobar is a dilly of a gizmo. It can do whatever you
- * want it to do, at any time. It reads your mind. Here's how it works.
- *
- * foo bar splat
- *
- * The only drawback to this gizmo is that is can sometimes damage
- * hardware, software, or its subject(s).
- */
-
-DOC: sections are used in ReST files.
-
-Tim.
-*/ <twaugh@redhat.com>
diff --git a/Documentation/kernel-hacking/hacking.rst b/Documentation/kernel-hacking/hacking.rst
index daf3883b2694..9999c8468293 100644
--- a/Documentation/kernel-hacking/hacking.rst
+++ b/Documentation/kernel-hacking/hacking.rst
@@ -523,7 +523,7 @@ this expression is true, or ``-ERESTARTSYS`` if a signal is received. The
Waking Up Queued Tasks
----------------------
-Call :c:func:`wake_up()` (``include/linux/wait.h``);, which will wake
+Call :c:func:`wake_up()` (``include/linux/wait.h``), which will wake
up every process in the queue. The exception is if one has
``TASK_EXCLUSIVE`` set, in which case the remainder of the queue will
not be woken. There are other variants of this basic function available
@@ -690,8 +690,8 @@ not provide the necessary runtime environment and the include files are
not tested for it. It is still possible, but not recommended. If you
really want to do this, forget about exceptions at least.
-NUMif
------
+#if
+---
It is generally considered cleaner to use macros in header files (or at
the top of .c files) to abstract away functions rather than using \`#if'
diff --git a/Documentation/livepatch/livepatch.txt b/Documentation/livepatch/livepatch.txt
index ecdb18104ab0..1ae2de758c08 100644
--- a/Documentation/livepatch/livepatch.txt
+++ b/Documentation/livepatch/livepatch.txt
@@ -72,8 +72,7 @@ example, they add a NULL pointer or a boundary check, fix a race by adding
a missing memory barrier, or add some locking around a critical section.
Most of these changes are self contained and the function presents itself
the same way to the rest of the system. In this case, the functions might
-be updated independently one by one. (This can be done by setting the
-'immediate' flag in the klp_patch struct.)
+be updated independently one by one.
But there are more complex fixes. For example, a patch might change
ordering of locking in multiple functions at the same time. Or a patch
@@ -125,12 +124,6 @@ safe to patch tasks:
b) Patching CPU-bound user tasks. If the task is highly CPU-bound
then it will get patched the next time it gets interrupted by an
IRQ.
- c) In the future it could be useful for applying patches for
- architectures which don't yet have HAVE_RELIABLE_STACKTRACE. In
- this case you would have to signal most of the tasks on the
- system. However this isn't supported yet because there's
- currently no way to patch kthreads without
- HAVE_RELIABLE_STACKTRACE.
3. For idle "swapper" tasks, since they don't ever exit the kernel, they
instead have a klp_update_patch_state() call in the idle loop which
@@ -138,27 +131,16 @@ safe to patch tasks:
(Note there's not yet such an approach for kthreads.)
-All the above approaches may be skipped by setting the 'immediate' flag
-in the 'klp_patch' struct, which will disable per-task consistency and
-patch all tasks immediately. This can be useful if the patch doesn't
-change any function or data semantics. Note that, even with this flag
-set, it's possible that some tasks may still be running with an old
-version of the function, until that function returns.
+Architectures which don't have HAVE_RELIABLE_STACKTRACE solely rely on
+the second approach. It's highly likely that some tasks may still be
+running with an old version of the function, until that function
+returns. In this case you would have to signal the tasks. This
+especially applies to kthreads. They may not be woken up and would need
+to be forced. See below for more information.
-There's also an 'immediate' flag in the 'klp_func' struct which allows
-you to specify that certain functions in the patch can be applied
-without per-task consistency. This might be useful if you want to patch
-a common function like schedule(), and the function change doesn't need
-consistency but the rest of the patch does.
-
-For architectures which don't have HAVE_RELIABLE_STACKTRACE, the user
-must set patch->immediate which causes all tasks to be patched
-immediately. This option should be used with care, only when the patch
-doesn't change any function or data semantics.
-
-In the future, architectures which don't have HAVE_RELIABLE_STACKTRACE
-may be allowed to use per-task consistency if we can come up with
-another way to patch kthreads.
+Unless we can come up with another way to patch kthreads, architectures
+without HAVE_RELIABLE_STACKTRACE are not considered fully supported by
+the kernel livepatching.
The /sys/kernel/livepatch/<patch>/transition file shows whether a patch
is in transition. Only a single patch (the topmost patch on the stack)
@@ -176,8 +158,31 @@ If a patch is in transition, this file shows 0 to indicate the task is
unpatched and 1 to indicate it's patched. Otherwise, if no patch is in
transition, it shows -1. Any tasks which are blocking the transition
can be signaled with SIGSTOP and SIGCONT to force them to change their
-patched state.
-
+patched state. This may be harmful to the system though.
+/sys/kernel/livepatch/<patch>/signal attribute provides a better alternative.
+Writing 1 to the attribute sends a fake signal to all remaining blocking
+tasks. No proper signal is actually delivered (there is no data in signal
+pending structures). Tasks are interrupted or woken up, and forced to change
+their patched state.
+
+Administrator can also affect a transition through
+/sys/kernel/livepatch/<patch>/force attribute. Writing 1 there clears
+TIF_PATCH_PENDING flag of all tasks and thus forces the tasks to the patched
+state. Important note! The force attribute is intended for cases when the
+transition gets stuck for a long time because of a blocking task. Administrator
+is expected to collect all necessary data (namely stack traces of such blocking
+tasks) and request a clearance from a patch distributor to force the transition.
+Unauthorized usage may cause harm to the system. It depends on the nature of the
+patch, which functions are (un)patched, and which functions the blocking tasks
+are sleeping in (/proc/<pid>/stack may help here). Removal (rmmod) of patch
+modules is permanently disabled when the force feature is used. It cannot be
+guaranteed there is no task sleeping in such module. It implies unbounded
+reference count if a patch module is disabled and enabled in a loop.
+
+Moreover, the usage of force may also affect future applications of live
+patches and cause even more harm to the system. Administrator should first
+consider to simply cancel a transition (see above). If force is used, reboot
+should be planned and no more live patches applied.
3.1 Adding consistency model support to new architectures
---------------------------------------------------------
@@ -216,13 +221,6 @@ few options:
a good backup option for those architectures which don't have
reliable stack traces yet.
-In the meantime, patches for such architectures can bypass the
-consistency model by setting klp_patch.immediate to true. This option
-is perfectly fine for patches which don't change the semantics of the
-patched functions. In practice, this is usable for ~90% of security
-fixes. Use of this option also means the patch can't be unloaded after
-it has been disabled.
-
4. Livepatch module
===================
@@ -278,9 +276,6 @@ into three levels:
only for a particular object ( vmlinux or a kernel module ). Note that
kallsyms allows for searching symbols according to the object name.
- There's also an 'immediate' flag which, when set, patches the
- function immediately, bypassing the consistency model safety checks.
-
+ struct klp_object defines an array of patched functions (struct
klp_func) in the same object. Where the object is either vmlinux
(NULL) or a module name.
@@ -299,9 +294,6 @@ into three levels:
symbols are found. The only exception are symbols from objects
(kernel modules) that have not been loaded yet.
- Setting the 'immediate' flag applies the patch to all tasks
- immediately, bypassing the consistency model safety checks.
-
For more details on how the patch is applied on a per-task basis,
see the "Consistency model" section.
@@ -316,14 +308,12 @@ section "Livepatch life-cycle" below for more details about these
two operations.
Module removal is only safe when there are no users of the underlying
-functions. The immediate consistency model is not able to detect this. The
-code just redirects the functions at the very beginning and it does not
-check if the functions are in use. In other words, it knows when the
-functions get called but it does not know when the functions return.
-Therefore it cannot be decided when the livepatch module can be safely
-removed. This is solved by a hybrid consistency model. When the system is
-transitioned to a new patch state (patched/unpatched) it is guaranteed that
-no task sleeps or runs in the old code.
+functions. This is the reason why the force feature permanently disables
+the removal. The forced tasks entered the functions but we cannot say
+that they returned back. Therefore it cannot be decided when the
+livepatch module can be safely removed. When the system is successfully
+transitioned to a new patch state (patched/unpatched) without being
+forced it is guaranteed that no task sleeps or runs in the old code.
5. Livepatch life-cycle
@@ -337,19 +327,12 @@ First, the patch is applied only when all patched symbols for already
loaded objects are found. The error handling is much easier if this
check is done before particular functions get redirected.
-Second, the immediate consistency model does not guarantee that anyone is not
-sleeping in the new code after the patch is reverted. This means that the new
-code needs to stay around "forever". If the code is there, one could apply it
-again. Therefore it makes sense to separate the operations that might be done
-once and those that need to be repeated when the patch is enabled (applied)
-again.
-
-Third, it might take some time until the entire system is migrated
-when a more complex consistency model is used. The patch revert might
-block the livepatch module removal for too long. Therefore it is useful
-to revert the patch using a separate operation that might be called
-explicitly. But it does not make sense to remove all information
-until the livepatch module is really removed.
+Second, it might take some time until the entire system is migrated with
+the hybrid consistency model being used. The patch revert might block
+the livepatch module removal for too long. Therefore it is useful to
+revert the patch using a separate operation that might be called
+explicitly. But it does not make sense to remove all information until
+the livepatch module is really removed.
5.1. Registration
@@ -435,6 +418,9 @@ Information about the registered patches can be found under
/sys/kernel/livepatch. The patches could be enabled and disabled
by writing there.
+/sys/kernel/livepatch/<patch>/signal and /sys/kernel/livepatch/<patch>/force
+attributes allow administrator to affect a patching operation.
+
See Documentation/ABI/testing/sysfs-kernel-livepatch for more details.
diff --git a/Documentation/locking/locktorture.txt b/Documentation/locking/locktorture.txt
index a2ef3a929bf1..6a8df4cd19bf 100644
--- a/Documentation/locking/locktorture.txt
+++ b/Documentation/locking/locktorture.txt
@@ -57,11 +57,6 @@ torture_type Type of lock to torture. By default, only spinlocks will
o "rwsem_lock": read/write down() and up() semaphore pairs.
-torture_runnable Start locktorture at boot time in the case where the
- module is built into the kernel, otherwise wait for
- torture_runnable to be set via sysfs before starting.
- By default it will begin once the module is loaded.
-
** Torture-framework (RCU + locking) **
diff --git a/Documentation/maintainer/conf.py b/Documentation/maintainer/conf.py
new file mode 100644
index 000000000000..81e9eb7a7884
--- /dev/null
+++ b/Documentation/maintainer/conf.py
@@ -0,0 +1,10 @@
+# -*- coding: utf-8; mode: python -*-
+
+project = 'Linux Kernel Development Documentation'
+
+tags.add("subproject")
+
+latex_documents = [
+ ('index', 'maintainer.tex', 'Linux Kernel Development Documentation',
+ 'The kernel development community', 'manual'),
+]
diff --git a/Documentation/maintainer/configure-git.rst b/Documentation/maintainer/configure-git.rst
new file mode 100644
index 000000000000..78bbbb0d2c84
--- /dev/null
+++ b/Documentation/maintainer/configure-git.rst
@@ -0,0 +1,34 @@
+.. _configuregit:
+
+Configure Git
+=============
+
+This chapter describes maintainer level git configuration.
+
+Tagged branches used in :ref:`Documentation/maintainer/pull-requests.rst
+<pullrequests>` should be signed with the developers public GPG key. Signed
+tags can be created by passing the ``-u`` flag to ``git tag``. However,
+since you would *usually* use the same key for the same project, you can
+set it once with
+::
+
+ git config user.signingkey "keyname"
+
+Alternatively, edit your ``.git/config`` or ``~/.gitconfig`` file by hand:
+::
+
+ [user]
+ name = Jane Developer
+ email = jd@domain.org
+ signingkey = jd@domain.org
+
+You may need to tell ``git`` to use ``gpg2``
+::
+
+ [gpg]
+ program = /path/to/gpg2
+
+You may also like to tell ``gpg`` which ``tty`` to use (add to your shell rc file)
+::
+
+ export GPG_TTY=$(tty)
diff --git a/Documentation/maintainer/index.rst b/Documentation/maintainer/index.rst
new file mode 100644
index 000000000000..2a14916930cb
--- /dev/null
+++ b/Documentation/maintainer/index.rst
@@ -0,0 +1,14 @@
+==========================
+Kernel Maintainer Handbook
+==========================
+
+This document is the humble beginning of a manual for kernel maintainers.
+There is a lot yet to go here! Please feel free to propose (and write)
+additions to this manual.
+
+.. toctree::
+ :maxdepth: 2
+
+ configure-git
+ pull-requests
+
diff --git a/Documentation/maintainer/pull-requests.rst b/Documentation/maintainer/pull-requests.rst
new file mode 100644
index 000000000000..a19db3458b56
--- /dev/null
+++ b/Documentation/maintainer/pull-requests.rst
@@ -0,0 +1,178 @@
+.. _pullrequests:
+
+Creating Pull Requests
+======================
+
+This chapter describes how maintainers can create and submit pull requests
+to other maintainers. This is useful for transferring changes from one
+maintainers tree to another maintainers tree.
+
+This document was written by Tobin C. Harding (who at that time, was not an
+experienced maintainer) primarily from comments made by Greg Kroah-Hartman
+and Linus Torvalds on LKML. Suggestions and fixes by Jonathan Corbet and
+Mauro Carvalho Chehab. Misrepresentation was unintentional but inevitable,
+please direct abuse to Tobin C. Harding <me@tobin.cc>.
+
+Original email thread::
+
+ http://lkml.kernel.org/r/20171114110500.GA21175@kroah.com
+
+
+Create Branch
+-------------
+
+To start with you will need to have all the changes you wish to include in
+the pull request on a separate branch. Typically you will base this branch
+off of a branch in the developers tree whom you intend to send the pull
+request to.
+
+In order to create the pull request you must first tag the branch that you
+have just created. It is recommended that you choose a meaningful tag name,
+in a way that you and others can understand, even after some time. A good
+practice is to include in the name an indicator of the sybsystem of origin
+and the target kernel version.
+
+Greg offers the following. A pull request with miscellaneous stuff for
+drivers/char, to be applied at the Kernel version 4.15-rc1 could be named
+as ``char-misc-4.15-rc1``. If such tag would be produced from a branch
+named ``char-misc-next``, you would be using the following command::
+
+ git tag -s char-misc-4.15-rc1 char-misc-next
+
+that will create a signed tag called ``char-misc-4.15-rc1`` based on the
+last commit in the ``char-misc-next`` branch, and sign it with your gpg key
+(see :ref:`Documentation/maintainer/configure_git.rst <configuregit>`).
+
+Linus will only accept pull requests based on a signed tag. Other
+maintainers may differ.
+
+When you run the above command ``git`` will drop you into an editor and ask
+you to describe the tag. In this case, you are describing a pull request,
+so outline what is contained here, why it should be merged, and what, if
+any, testing has been done. All of this information will end up in the tag
+itself, and then in the merge commit that the maintainer makes if/when they
+merge the pull request. So write it up well, as it will be in the kernel
+tree for forever.
+
+As said by Linus::
+
+ Anyway, at least to me, the important part is the *message*. I want
+ to understand what I'm pulling, and why I should pull it. I also
+ want to use that message as the message for the merge, so it should
+ not just make sense to me, but make sense as a historical record
+ too.
+
+ Note that if there is something odd about the pull request, that
+ should very much be in the explanation. If you're touching files
+ that you don't maintain, explain _why_. I will see it in the
+ diffstat anyway, and if you didn't mention it, I'll just be extra
+ suspicious. And when you send me new stuff after the merge window
+ (or even bug-fixes, but ones that look scary), explain not just
+ what they do and why they do it, but explain the _timing_. What
+ happened that this didn't go through the merge window..
+
+ I will take both what you write in the email pull request _and_ in
+ the signed tag, so depending on your workflow, you can either
+ describe your work in the signed tag (which will also automatically
+ make it into the pull request email), or you can make the signed
+ tag just a placeholder with nothing interesting in it, and describe
+ the work later when you actually send me the pull request.
+
+ And yes, I will edit the message. Partly because I tend to do just
+ trivial formatting (the whole indentation and quoting etc), but
+ partly because part of the message may make sense for me at pull
+ time (describing the conflicts and your personal issues for sending
+ it right now), but may not make sense in the context of a merge
+ commit message, so I will try to make it all make sense. I will
+ also fix any speeling mistaeks and bad grammar I notice,
+ particularly for non-native speakers (but also for native ones
+ ;^). But I may miss some, or even add some.
+
+ Linus
+
+Greg gives, as an example pull request::
+
+ Char/Misc patches for 4.15-rc1
+
+ Here is the big char/misc patch set for the 4.15-rc1 merge window.
+ Contained in here is the normal set of new functions added to all
+ of these crazy drivers, as well as the following brand new
+ subsystems:
+ - time_travel_controller: Finally a set of drivers for the
+ latest time travel bus architecture that provides i/o to
+ the CPU before it asked for it, allowing uninterrupted
+ processing
+ - relativity_shifters: due to the affect that the
+ time_travel_controllers have on the overall system, there
+ was a need for a new set of relativity shifter drivers to
+ accommodate the newly formed black holes that would
+ threaten to suck CPUs into them. This subsystem handles
+ this in a way to successfully neutralize the problems.
+ There is a Kconfig option to force these to be enabled
+ when needed, so problems should not occur.
+
+ All of these patches have been successfully tested in the latest
+ linux-next releases, and the original problems that it found have
+ all been resolved (apologies to anyone living near Canberra for the
+ lack of the Kconfig options in the earlier versions of the
+ linux-next tree creations.)
+
+ Signed-off-by: Your-name-here <your_email@domain>
+
+
+The tag message format is just like a git commit id. One line at the top
+for a "summary subject" and be sure to sign-off at the bottom.
+
+Now that you have a local signed tag, you need to push it up to where it
+can be retrieved::
+
+ git push origin char-misc-4.15-rc1
+
+
+Create Pull Request
+-------------------
+
+The last thing to do is create the pull request message. ``git`` handily
+will do this for you with the ``git request-pull`` command, but it needs a
+bit of help determining what you want to pull, and on what to base the pull
+against (to show the correct changes to be pulled and the diffstat). The
+following command(s) will generate a pull request::
+
+ git request-pull master git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git/ char-misc-4.15-rc1
+
+Quoting Greg::
+
+ This is asking git to compare the difference from the
+ 'char-misc-4.15-rc1' tag location, to the head of the 'master'
+ branch (which in my case points to the last location in Linus's
+ tree that I diverged from, usually a -rc release) and to use the
+ git:// protocol to pull from. If you wish to use https://, that
+ can be used here instead as well (but note that some people behind
+ firewalls will have problems with https git pulls).
+
+ If the char-misc-4.15-rc1 tag is not present in the repo that I am
+ asking to be pulled from, git will complain saying it is not there,
+ a handy way to remember to actually push it to a public location.
+
+ The output of 'git request-pull' will contain the location of the
+ git tree and specific tag to pull from, and the full text
+ description of that tag (which is why you need to provide good
+ information in that tag). It will also create a diffstat of the
+ pull request, and a shortlog of the individual commits that the
+ pull request will provide.
+
+Linus responded that he tends to prefer the ``git://`` protocol. Other
+maintainers may have different preferences. Also, note that if you are
+creating pull requests without a signed tag then ``https://`` may be a
+better choice. Please see the original thread for the full discussion.
+
+
+Submit Pull Request
+-------------------
+
+A pull request is submitted in the same way as an ordinary patch. Send as
+inline email to the maintainer and CC LKML and any sub-system specific
+lists if required. Pull requests to Linus typically have a subject line
+something like::
+
+ [GIT PULL] <subsystem> changes for v4.15-rc1
diff --git a/Documentation/md/raid5-ppl.txt b/Documentation/md/raid5-ppl.txt
index 127072b09363..bfa092589e00 100644
--- a/Documentation/md/raid5-ppl.txt
+++ b/Documentation/md/raid5-ppl.txt
@@ -39,6 +39,7 @@ case the behavior is the same as in plain raid5.
PPL is available for md version-1 metadata and external (specifically IMSM)
metadata arrays. It can be enabled using mdadm option --consistency-policy=ppl.
-Currently, volatile write-back cache should be disabled on all member drives
-when using PPL. Otherwise it cannot guarantee consistency in case of power
-failure.
+There is a limitation of maximum 64 disks in the array for PPL. It allows to
+keep data structures and implementation simple. RAID5 arrays with so many disks
+are not likely due to high risk of multiple disks failure. Such restriction
+should not be a real life limitation.
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 479ecec80593..a863009849a3 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -227,17 +227,20 @@ There are some minimal guarantees that may be expected of a CPU:
(*) On any given CPU, dependent memory accesses will be issued in order, with
respect to itself. This means that for:
- Q = READ_ONCE(P); smp_read_barrier_depends(); D = READ_ONCE(*Q);
+ Q = READ_ONCE(P); D = READ_ONCE(*Q);
the CPU will issue the following memory operations:
Q = LOAD P, D = LOAD *Q
- and always in that order. On most systems, smp_read_barrier_depends()
- does nothing, but it is required for DEC Alpha. The READ_ONCE()
- is required to prevent compiler mischief. Please note that you
- should normally use something like rcu_dereference() instead of
- open-coding smp_read_barrier_depends().
+ and always in that order. However, on DEC Alpha, READ_ONCE() also
+ emits a memory-barrier instruction, so that a DEC Alpha CPU will
+ instead issue the following memory operations:
+
+ Q = LOAD P, MEMORY_BARRIER, D = LOAD *Q, MEMORY_BARRIER
+
+ Whether on DEC Alpha or not, the READ_ONCE() also prevents compiler
+ mischief.
(*) Overlapping loads and stores within a particular CPU will appear to be
ordered within that CPU. This means that for:
@@ -1815,7 +1818,7 @@ The Linux kernel has eight basic CPU memory barriers:
GENERAL mb() smp_mb()
WRITE wmb() smp_wmb()
READ rmb() smp_rmb()
- DATA DEPENDENCY read_barrier_depends() smp_read_barrier_depends()
+ DATA DEPENDENCY READ_ONCE()
All memory barriers except the data dependency barriers imply a compiler
@@ -2864,7 +2867,10 @@ access depends on a read, not all do, so it may not be relied on.
Other CPUs may also have split caches, but must coordinate between the various
cachelets for normal memory accesses. The semantics of the Alpha removes the
-need for coordination in the absence of memory barriers.
+need for hardware coordination in the absence of memory barriers, which
+permitted Alpha to sport higher CPU clock rates back in the day. However,
+please note that smp_read_barrier_depends() should not be used except in
+Alpha arch-specific code and within the READ_ONCE() macro.
CACHE COHERENCY VS DMA
diff --git a/Documentation/mtd/spi-nor.txt b/Documentation/mtd/spi-nor.txt
index 548d6306ebca..da1fbff5a24c 100644
--- a/Documentation/mtd/spi-nor.txt
+++ b/Documentation/mtd/spi-nor.txt
@@ -60,3 +60,6 @@ The main API is spi_nor_scan(). Before you call the hook, a driver should
initialize the necessary fields for spi_nor{}. Please see
drivers/mtd/spi-nor/spi-nor.c for detail. Please also refer to fsl-quadspi.c
when you want to write a new driver for a SPI NOR controller.
+Another API is spi_nor_restore(), this is used to restore the status of SPI
+flash chip such as addressing mode. Call it whenever detach the driver from
+device or reboot the system.
diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX
index 7a79b3587dd3..2b89d91b376f 100644
--- a/Documentation/networking/00-INDEX
+++ b/Documentation/networking/00-INDEX
@@ -36,8 +36,6 @@ bonding.txt
- Linux Ethernet Bonding Driver HOWTO: link aggregation in Linux.
bridge.txt
- where to get user space programs for ethernet bridging with Linux.
-can.txt
- - documentation on CAN protocol family.
cdc_mbim.txt
- 3G/LTE USB modem (Mobile Broadband Interface Model)
checksum-offloads.txt
@@ -228,6 +226,8 @@ x25.txt
- general info on X.25 development.
x25-iface.txt
- description of the X.25 Packet Layer to LAPB device interface.
+xfrm_device.txt
+ - description of XFRM offload API
xfrm_proc.txt
- description of the statistics package for XFRM.
xfrm_sync.txt
diff --git a/Documentation/networking/batman-adv.rst b/Documentation/networking/batman-adv.rst
index a342b2cc3dc6..245fb6c0ab6f 100644
--- a/Documentation/networking/batman-adv.rst
+++ b/Documentation/networking/batman-adv.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
==========
batman-adv
==========
diff --git a/Documentation/networking/can.rst b/Documentation/networking/can.rst
new file mode 100644
index 000000000000..d23c51abf8c6
--- /dev/null
+++ b/Documentation/networking/can.rst
@@ -0,0 +1,1437 @@
+===================================
+SocketCAN - Controller Area Network
+===================================
+
+Overview / What is SocketCAN
+============================
+
+The socketcan package is an implementation of CAN protocols
+(Controller Area Network) for Linux. CAN is a networking technology
+which has widespread use in automation, embedded devices, and
+automotive fields. While there have been other CAN implementations
+for Linux based on character devices, SocketCAN uses the Berkeley
+socket API, the Linux network stack and implements the CAN device
+drivers as network interfaces. The CAN socket API has been designed
+as similar as possible to the TCP/IP protocols to allow programmers,
+familiar with network programming, to easily learn how to use CAN
+sockets.
+
+
+.. _socketcan-motivation:
+
+Motivation / Why Using the Socket API
+=====================================
+
+There have been CAN implementations for Linux before SocketCAN so the
+question arises, why we have started another project. Most existing
+implementations come as a device driver for some CAN hardware, they
+are based on character devices and provide comparatively little
+functionality. Usually, there is only a hardware-specific device
+driver which provides a character device interface to send and
+receive raw CAN frames, directly to/from the controller hardware.
+Queueing of frames and higher-level transport protocols like ISO-TP
+have to be implemented in user space applications. Also, most
+character-device implementations support only one single process to
+open the device at a time, similar to a serial interface. Exchanging
+the CAN controller requires employment of another device driver and
+often the need for adaption of large parts of the application to the
+new driver's API.
+
+SocketCAN was designed to overcome all of these limitations. A new
+protocol family has been implemented which provides a socket interface
+to user space applications and which builds upon the Linux network
+layer, enabling use all of the provided queueing functionality. A device
+driver for CAN controller hardware registers itself with the Linux
+network layer as a network device, so that CAN frames from the
+controller can be passed up to the network layer and on to the CAN
+protocol family module and also vice-versa. Also, the protocol family
+module provides an API for transport protocol modules to register, so
+that any number of transport protocols can be loaded or unloaded
+dynamically. In fact, the can core module alone does not provide any
+protocol and cannot be used without loading at least one additional
+protocol module. Multiple sockets can be opened at the same time,
+on different or the same protocol module and they can listen/send
+frames on different or the same CAN IDs. Several sockets listening on
+the same interface for frames with the same CAN ID are all passed the
+same received matching CAN frames. An application wishing to
+communicate using a specific transport protocol, e.g. ISO-TP, just
+selects that protocol when opening the socket, and then can read and
+write application data byte streams, without having to deal with
+CAN-IDs, frames, etc.
+
+Similar functionality visible from user-space could be provided by a
+character device, too, but this would lead to a technically inelegant
+solution for a couple of reasons:
+
+* **Intricate usage:** Instead of passing a protocol argument to
+ socket(2) and using bind(2) to select a CAN interface and CAN ID, an
+ application would have to do all these operations using ioctl(2)s.
+
+* **Code duplication:** A character device cannot make use of the Linux
+ network queueing code, so all that code would have to be duplicated
+ for CAN networking.
+
+* **Abstraction:** In most existing character-device implementations, the
+ hardware-specific device driver for a CAN controller directly
+ provides the character device for the application to work with.
+ This is at least very unusual in Unix systems for both, char and
+ block devices. For example you don't have a character device for a
+ certain UART of a serial interface, a certain sound chip in your
+ computer, a SCSI or IDE controller providing access to your hard
+ disk or tape streamer device. Instead, you have abstraction layers
+ which provide a unified character or block device interface to the
+ application on the one hand, and a interface for hardware-specific
+ device drivers on the other hand. These abstractions are provided
+ by subsystems like the tty layer, the audio subsystem or the SCSI
+ and IDE subsystems for the devices mentioned above.
+
+ The easiest way to implement a CAN device driver is as a character
+ device without such a (complete) abstraction layer, as is done by most
+ existing drivers. The right way, however, would be to add such a
+ layer with all the functionality like registering for certain CAN
+ IDs, supporting several open file descriptors and (de)multiplexing
+ CAN frames between them, (sophisticated) queueing of CAN frames, and
+ providing an API for device drivers to register with. However, then
+ it would be no more difficult, or may be even easier, to use the
+ networking framework provided by the Linux kernel, and this is what
+ SocketCAN does.
+
+The use of the networking framework of the Linux kernel is just the
+natural and most appropriate way to implement CAN for Linux.
+
+
+.. _socketcan-concept:
+
+SocketCAN Concept
+=================
+
+As described in :ref:`socketcan-motivation` the main goal of SocketCAN is to
+provide a socket interface to user space applications which builds
+upon the Linux network layer. In contrast to the commonly known
+TCP/IP and ethernet networking, the CAN bus is a broadcast-only(!)
+medium that has no MAC-layer addressing like ethernet. The CAN-identifier
+(can_id) is used for arbitration on the CAN-bus. Therefore the CAN-IDs
+have to be chosen uniquely on the bus. When designing a CAN-ECU
+network the CAN-IDs are mapped to be sent by a specific ECU.
+For this reason a CAN-ID can be treated best as a kind of source address.
+
+
+.. _socketcan-receive-lists:
+
+Receive Lists
+-------------
+
+The network transparent access of multiple applications leads to the
+problem that different applications may be interested in the same
+CAN-IDs from the same CAN network interface. The SocketCAN core
+module - which implements the protocol family CAN - provides several
+high efficient receive lists for this reason. If e.g. a user space
+application opens a CAN RAW socket, the raw protocol module itself
+requests the (range of) CAN-IDs from the SocketCAN core that are
+requested by the user. The subscription and unsubscription of
+CAN-IDs can be done for specific CAN interfaces or for all(!) known
+CAN interfaces with the can_rx_(un)register() functions provided to
+CAN protocol modules by the SocketCAN core (see :ref:`socketcan-core-module`).
+To optimize the CPU usage at runtime the receive lists are split up
+into several specific lists per device that match the requested
+filter complexity for a given use-case.
+
+
+.. _socketcan-local-loopback1:
+
+Local Loopback of Sent Frames
+-----------------------------
+
+As known from other networking concepts the data exchanging
+applications may run on the same or different nodes without any
+change (except for the according addressing information):
+
+.. code::
+
+ ___ ___ ___ _______ ___
+ | _ | | _ | | _ | | _ _ | | _ |
+ ||A|| ||B|| ||C|| ||A| |B|| ||C||
+ |___| |___| |___| |_______| |___|
+ | | | | |
+ -----------------(1)- CAN bus -(2)---------------
+
+To ensure that application A receives the same information in the
+example (2) as it would receive in example (1) there is need for
+some kind of local loopback of the sent CAN frames on the appropriate
+node.
+
+The Linux network devices (by default) just can handle the
+transmission and reception of media dependent frames. Due to the
+arbitration on the CAN bus the transmission of a low prio CAN-ID
+may be delayed by the reception of a high prio CAN frame. To
+reflect the correct [*]_ traffic on the node the loopback of the sent
+data has to be performed right after a successful transmission. If
+the CAN network interface is not capable of performing the loopback for
+some reason the SocketCAN core can do this task as a fallback solution.
+See :ref:`socketcan-local-loopback1` for details (recommended).
+
+The loopback functionality is enabled by default to reflect standard
+networking behaviour for CAN applications. Due to some requests from
+the RT-SocketCAN group the loopback optionally may be disabled for each
+separate socket. See sockopts from the CAN RAW sockets in :ref:`socketcan-raw-sockets`.
+
+.. [*] you really like to have this when you're running analyser
+ tools like 'candump' or 'cansniffer' on the (same) node.
+
+
+.. _socketcan-network-problem-notifications:
+
+Network Problem Notifications
+-----------------------------
+
+The use of the CAN bus may lead to several problems on the physical
+and media access control layer. Detecting and logging of these lower
+layer problems is a vital requirement for CAN users to identify
+hardware issues on the physical transceiver layer as well as
+arbitration problems and error frames caused by the different
+ECUs. The occurrence of detected errors are important for diagnosis
+and have to be logged together with the exact timestamp. For this
+reason the CAN interface driver can generate so called Error Message
+Frames that can optionally be passed to the user application in the
+same way as other CAN frames. Whenever an error on the physical layer
+or the MAC layer is detected (e.g. by the CAN controller) the driver
+creates an appropriate error message frame. Error messages frames can
+be requested by the user application using the common CAN filter
+mechanisms. Inside this filter definition the (interested) type of
+errors may be selected. The reception of error messages is disabled
+by default. The format of the CAN error message frame is briefly
+described in the Linux header file "include/uapi/linux/can/error.h".
+
+
+How to use SocketCAN
+====================
+
+Like TCP/IP, you first need to open a socket for communicating over a
+CAN network. Since SocketCAN implements a new protocol family, you
+need to pass PF_CAN as the first argument to the socket(2) system
+call. Currently, there are two CAN protocols to choose from, the raw
+socket protocol and the broadcast manager (BCM). So to open a socket,
+you would write::
+
+ s = socket(PF_CAN, SOCK_RAW, CAN_RAW);
+
+and::
+
+ s = socket(PF_CAN, SOCK_DGRAM, CAN_BCM);
+
+respectively. After the successful creation of the socket, you would
+normally use the bind(2) system call to bind the socket to a CAN
+interface (which is different from TCP/IP due to different addressing
+- see :ref:`socketcan-concept`). After binding (CAN_RAW) or connecting (CAN_BCM)
+the socket, you can read(2) and write(2) from/to the socket or use
+send(2), sendto(2), sendmsg(2) and the recv* counterpart operations
+on the socket as usual. There are also CAN specific socket options
+described below.
+
+The basic CAN frame structure and the sockaddr structure are defined
+in include/linux/can.h:
+
+.. code-block:: C
+
+ struct can_frame {
+ canid_t can_id; /* 32 bit CAN_ID + EFF/RTR/ERR flags */
+ __u8 can_dlc; /* frame payload length in byte (0 .. 8) */
+ __u8 __pad; /* padding */
+ __u8 __res0; /* reserved / padding */
+ __u8 __res1; /* reserved / padding */
+ __u8 data[8] __attribute__((aligned(8)));
+ };
+
+The alignment of the (linear) payload data[] to a 64bit boundary
+allows the user to define their own structs and unions to easily access
+the CAN payload. There is no given byteorder on the CAN bus by
+default. A read(2) system call on a CAN_RAW socket transfers a
+struct can_frame to the user space.
+
+The sockaddr_can structure has an interface index like the
+PF_PACKET socket, that also binds to a specific interface:
+
+.. code-block:: C
+
+ struct sockaddr_can {
+ sa_family_t can_family;
+ int can_ifindex;
+ union {
+ /* transport protocol class address info (e.g. ISOTP) */
+ struct { canid_t rx_id, tx_id; } tp;
+
+ /* reserved for future CAN protocols address information */
+ } can_addr;
+ };
+
+To determine the interface index an appropriate ioctl() has to
+be used (example for CAN_RAW sockets without error checking):
+
+.. code-block:: C
+
+ int s;
+ struct sockaddr_can addr;
+ struct ifreq ifr;
+
+ s = socket(PF_CAN, SOCK_RAW, CAN_RAW);
+
+ strcpy(ifr.ifr_name, "can0" );
+ ioctl(s, SIOCGIFINDEX, &ifr);
+
+ addr.can_family = AF_CAN;
+ addr.can_ifindex = ifr.ifr_ifindex;
+
+ bind(s, (struct sockaddr *)&addr, sizeof(addr));
+
+ (..)
+
+To bind a socket to all(!) CAN interfaces the interface index must
+be 0 (zero). In this case the socket receives CAN frames from every
+enabled CAN interface. To determine the originating CAN interface
+the system call recvfrom(2) may be used instead of read(2). To send
+on a socket that is bound to 'any' interface sendto(2) is needed to
+specify the outgoing interface.
+
+Reading CAN frames from a bound CAN_RAW socket (see above) consists
+of reading a struct can_frame:
+
+.. code-block:: C
+
+ struct can_frame frame;
+
+ nbytes = read(s, &frame, sizeof(struct can_frame));
+
+ if (nbytes < 0) {
+ perror("can raw socket read");
+ return 1;
+ }
+
+ /* paranoid check ... */
+ if (nbytes < sizeof(struct can_frame)) {
+ fprintf(stderr, "read: incomplete CAN frame\n");
+ return 1;
+ }
+
+ /* do something with the received CAN frame */
+
+Writing CAN frames can be done similarly, with the write(2) system call::
+
+ nbytes = write(s, &frame, sizeof(struct can_frame));
+
+When the CAN interface is bound to 'any' existing CAN interface
+(addr.can_ifindex = 0) it is recommended to use recvfrom(2) if the
+information about the originating CAN interface is needed:
+
+.. code-block:: C
+
+ struct sockaddr_can addr;
+ struct ifreq ifr;
+ socklen_t len = sizeof(addr);
+ struct can_frame frame;
+
+ nbytes = recvfrom(s, &frame, sizeof(struct can_frame),
+ 0, (struct sockaddr*)&addr, &len);
+
+ /* get interface name of the received CAN frame */
+ ifr.ifr_ifindex = addr.can_ifindex;
+ ioctl(s, SIOCGIFNAME, &ifr);
+ printf("Received a CAN frame from interface %s", ifr.ifr_name);
+
+To write CAN frames on sockets bound to 'any' CAN interface the
+outgoing interface has to be defined certainly:
+
+.. code-block:: C
+
+ strcpy(ifr.ifr_name, "can0");
+ ioctl(s, SIOCGIFINDEX, &ifr);
+ addr.can_ifindex = ifr.ifr_ifindex;
+ addr.can_family = AF_CAN;
+
+ nbytes = sendto(s, &frame, sizeof(struct can_frame),
+ 0, (struct sockaddr*)&addr, sizeof(addr));
+
+An accurate timestamp can be obtained with an ioctl(2) call after reading
+a message from the socket:
+
+.. code-block:: C
+
+ struct timeval tv;
+ ioctl(s, SIOCGSTAMP, &tv);
+
+The timestamp has a resolution of one microsecond and is set automatically
+at the reception of a CAN frame.
+
+Remark about CAN FD (flexible data rate) support:
+
+Generally the handling of CAN FD is very similar to the formerly described
+examples. The new CAN FD capable CAN controllers support two different
+bitrates for the arbitration phase and the payload phase of the CAN FD frame
+and up to 64 bytes of payload. This extended payload length breaks all the
+kernel interfaces (ABI) which heavily rely on the CAN frame with fixed eight
+bytes of payload (struct can_frame) like the CAN_RAW socket. Therefore e.g.
+the CAN_RAW socket supports a new socket option CAN_RAW_FD_FRAMES that
+switches the socket into a mode that allows the handling of CAN FD frames
+and (legacy) CAN frames simultaneously (see :ref:`socketcan-rawfd`).
+
+The struct canfd_frame is defined in include/linux/can.h:
+
+.. code-block:: C
+
+ struct canfd_frame {
+ canid_t can_id; /* 32 bit CAN_ID + EFF/RTR/ERR flags */
+ __u8 len; /* frame payload length in byte (0 .. 64) */
+ __u8 flags; /* additional flags for CAN FD */
+ __u8 __res0; /* reserved / padding */
+ __u8 __res1; /* reserved / padding */
+ __u8 data[64] __attribute__((aligned(8)));
+ };
+
+The struct canfd_frame and the existing struct can_frame have the can_id,
+the payload length and the payload data at the same offset inside their
+structures. This allows to handle the different structures very similar.
+When the content of a struct can_frame is copied into a struct canfd_frame
+all structure elements can be used as-is - only the data[] becomes extended.
+
+When introducing the struct canfd_frame it turned out that the data length
+code (DLC) of the struct can_frame was used as a length information as the
+length and the DLC has a 1:1 mapping in the range of 0 .. 8. To preserve
+the easy handling of the length information the canfd_frame.len element
+contains a plain length value from 0 .. 64. So both canfd_frame.len and
+can_frame.can_dlc are equal and contain a length information and no DLC.
+For details about the distinction of CAN and CAN FD capable devices and
+the mapping to the bus-relevant data length code (DLC), see :ref:`socketcan-can-fd-driver`.
+
+The length of the two CAN(FD) frame structures define the maximum transfer
+unit (MTU) of the CAN(FD) network interface and skbuff data length. Two
+definitions are specified for CAN specific MTUs in include/linux/can.h:
+
+.. code-block:: C
+
+ #define CAN_MTU (sizeof(struct can_frame)) == 16 => 'legacy' CAN frame
+ #define CANFD_MTU (sizeof(struct canfd_frame)) == 72 => CAN FD frame
+
+
+.. _socketcan-raw-sockets:
+
+RAW Protocol Sockets with can_filters (SOCK_RAW)
+------------------------------------------------
+
+Using CAN_RAW sockets is extensively comparable to the commonly
+known access to CAN character devices. To meet the new possibilities
+provided by the multi user SocketCAN approach, some reasonable
+defaults are set at RAW socket binding time:
+
+- The filters are set to exactly one filter receiving everything
+- The socket only receives valid data frames (=> no error message frames)
+- The loopback of sent CAN frames is enabled (see :ref:`socketcan-local-loopback2`)
+- The socket does not receive its own sent frames (in loopback mode)
+
+These default settings may be changed before or after binding the socket.
+To use the referenced definitions of the socket options for CAN_RAW
+sockets, include <linux/can/raw.h>.
+
+
+.. _socketcan-rawfilter:
+
+RAW socket option CAN_RAW_FILTER
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The reception of CAN frames using CAN_RAW sockets can be controlled
+by defining 0 .. n filters with the CAN_RAW_FILTER socket option.
+
+The CAN filter structure is defined in include/linux/can.h:
+
+.. code-block:: C
+
+ struct can_filter {
+ canid_t can_id;
+ canid_t can_mask;
+ };
+
+A filter matches, when:
+
+.. code-block:: C
+
+ <received_can_id> & mask == can_id & mask
+
+which is analogous to known CAN controllers hardware filter semantics.
+The filter can be inverted in this semantic, when the CAN_INV_FILTER
+bit is set in can_id element of the can_filter structure. In
+contrast to CAN controller hardware filters the user may set 0 .. n
+receive filters for each open socket separately:
+
+.. code-block:: C
+
+ struct can_filter rfilter[2];
+
+ rfilter[0].can_id = 0x123;
+ rfilter[0].can_mask = CAN_SFF_MASK;
+ rfilter[1].can_id = 0x200;
+ rfilter[1].can_mask = 0x700;
+
+ setsockopt(s, SOL_CAN_RAW, CAN_RAW_FILTER, &rfilter, sizeof(rfilter));
+
+To disable the reception of CAN frames on the selected CAN_RAW socket:
+
+.. code-block:: C
+
+ setsockopt(s, SOL_CAN_RAW, CAN_RAW_FILTER, NULL, 0);
+
+To set the filters to zero filters is quite obsolete as to not read
+data causes the raw socket to discard the received CAN frames. But
+having this 'send only' use-case we may remove the receive list in the
+Kernel to save a little (really a very little!) CPU usage.
+
+CAN Filter Usage Optimisation
+.............................
+
+The CAN filters are processed in per-device filter lists at CAN frame
+reception time. To reduce the number of checks that need to be performed
+while walking through the filter lists the CAN core provides an optimized
+filter handling when the filter subscription focusses on a single CAN ID.
+
+For the possible 2048 SFF CAN identifiers the identifier is used as an index
+to access the corresponding subscription list without any further checks.
+For the 2^29 possible EFF CAN identifiers a 10 bit XOR folding is used as
+hash function to retrieve the EFF table index.
+
+To benefit from the optimized filters for single CAN identifiers the
+CAN_SFF_MASK or CAN_EFF_MASK have to be set into can_filter.mask together
+with set CAN_EFF_FLAG and CAN_RTR_FLAG bits. A set CAN_EFF_FLAG bit in the
+can_filter.mask makes clear that it matters whether a SFF or EFF CAN ID is
+subscribed. E.g. in the example from above:
+
+.. code-block:: C
+
+ rfilter[0].can_id = 0x123;
+ rfilter[0].can_mask = CAN_SFF_MASK;
+
+both SFF frames with CAN ID 0x123 and EFF frames with 0xXXXXX123 can pass.
+
+To filter for only 0x123 (SFF) and 0x12345678 (EFF) CAN identifiers the
+filter has to be defined in this way to benefit from the optimized filters:
+
+.. code-block:: C
+
+ struct can_filter rfilter[2];
+
+ rfilter[0].can_id = 0x123;
+ rfilter[0].can_mask = (CAN_EFF_FLAG | CAN_RTR_FLAG | CAN_SFF_MASK);
+ rfilter[1].can_id = 0x12345678 | CAN_EFF_FLAG;
+ rfilter[1].can_mask = (CAN_EFF_FLAG | CAN_RTR_FLAG | CAN_EFF_MASK);
+
+ setsockopt(s, SOL_CAN_RAW, CAN_RAW_FILTER, &rfilter, sizeof(rfilter));
+
+
+RAW Socket Option CAN_RAW_ERR_FILTER
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+As described in :ref:`socketcan-network-problem-notifications` the CAN interface driver can generate so
+called Error Message Frames that can optionally be passed to the user
+application in the same way as other CAN frames. The possible
+errors are divided into different error classes that may be filtered
+using the appropriate error mask. To register for every possible
+error condition CAN_ERR_MASK can be used as value for the error mask.
+The values for the error mask are defined in linux/can/error.h:
+
+.. code-block:: C
+
+ can_err_mask_t err_mask = ( CAN_ERR_TX_TIMEOUT | CAN_ERR_BUSOFF );
+
+ setsockopt(s, SOL_CAN_RAW, CAN_RAW_ERR_FILTER,
+ &err_mask, sizeof(err_mask));
+
+
+RAW Socket Option CAN_RAW_LOOPBACK
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To meet multi user needs the local loopback is enabled by default
+(see :ref:`socketcan-local-loopback1` for details). But in some embedded use-cases
+(e.g. when only one application uses the CAN bus) this loopback
+functionality can be disabled (separately for each socket):
+
+.. code-block:: C
+
+ int loopback = 0; /* 0 = disabled, 1 = enabled (default) */
+
+ setsockopt(s, SOL_CAN_RAW, CAN_RAW_LOOPBACK, &loopback, sizeof(loopback));
+
+
+RAW socket option CAN_RAW_RECV_OWN_MSGS
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When the local loopback is enabled, all the sent CAN frames are
+looped back to the open CAN sockets that registered for the CAN
+frames' CAN-ID on this given interface to meet the multi user
+needs. The reception of the CAN frames on the same socket that was
+sending the CAN frame is assumed to be unwanted and therefore
+disabled by default. This default behaviour may be changed on
+demand:
+
+.. code-block:: C
+
+ int recv_own_msgs = 1; /* 0 = disabled (default), 1 = enabled */
+
+ setsockopt(s, SOL_CAN_RAW, CAN_RAW_RECV_OWN_MSGS,
+ &recv_own_msgs, sizeof(recv_own_msgs));
+
+
+.. _socketcan-rawfd:
+
+RAW Socket Option CAN_RAW_FD_FRAMES
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+CAN FD support in CAN_RAW sockets can be enabled with a new socket option
+CAN_RAW_FD_FRAMES which is off by default. When the new socket option is
+not supported by the CAN_RAW socket (e.g. on older kernels), switching the
+CAN_RAW_FD_FRAMES option returns the error -ENOPROTOOPT.
+
+Once CAN_RAW_FD_FRAMES is enabled the application can send both CAN frames
+and CAN FD frames. OTOH the application has to handle CAN and CAN FD frames
+when reading from the socket:
+
+.. code-block:: C
+
+ CAN_RAW_FD_FRAMES enabled: CAN_MTU and CANFD_MTU are allowed
+ CAN_RAW_FD_FRAMES disabled: only CAN_MTU is allowed (default)
+
+Example:
+
+.. code-block:: C
+
+ [ remember: CANFD_MTU == sizeof(struct canfd_frame) ]
+
+ struct canfd_frame cfd;
+
+ nbytes = read(s, &cfd, CANFD_MTU);
+
+ if (nbytes == CANFD_MTU) {
+ printf("got CAN FD frame with length %d\n", cfd.len);
+ /* cfd.flags contains valid data */
+ } else if (nbytes == CAN_MTU) {
+ printf("got legacy CAN frame with length %d\n", cfd.len);
+ /* cfd.flags is undefined */
+ } else {
+ fprintf(stderr, "read: invalid CAN(FD) frame\n");
+ return 1;
+ }
+
+ /* the content can be handled independently from the received MTU size */
+
+ printf("can_id: %X data length: %d data: ", cfd.can_id, cfd.len);
+ for (i = 0; i < cfd.len; i++)
+ printf("%02X ", cfd.data[i]);
+
+When reading with size CANFD_MTU only returns CAN_MTU bytes that have
+been received from the socket a legacy CAN frame has been read into the
+provided CAN FD structure. Note that the canfd_frame.flags data field is
+not specified in the struct can_frame and therefore it is only valid in
+CANFD_MTU sized CAN FD frames.
+
+Implementation hint for new CAN applications:
+
+To build a CAN FD aware application use struct canfd_frame as basic CAN
+data structure for CAN_RAW based applications. When the application is
+executed on an older Linux kernel and switching the CAN_RAW_FD_FRAMES
+socket option returns an error: No problem. You'll get legacy CAN frames
+or CAN FD frames and can process them the same way.
+
+When sending to CAN devices make sure that the device is capable to handle
+CAN FD frames by checking if the device maximum transfer unit is CANFD_MTU.
+The CAN device MTU can be retrieved e.g. with a SIOCGIFMTU ioctl() syscall.
+
+
+RAW socket option CAN_RAW_JOIN_FILTERS
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The CAN_RAW socket can set multiple CAN identifier specific filters that
+lead to multiple filters in the af_can.c filter processing. These filters
+are indenpendent from each other which leads to logical OR'ed filters when
+applied (see :ref:`socketcan-rawfilter`).
+
+This socket option joines the given CAN filters in the way that only CAN
+frames are passed to user space that matched *all* given CAN filters. The
+semantic for the applied filters is therefore changed to a logical AND.
+
+This is useful especially when the filterset is a combination of filters
+where the CAN_INV_FILTER flag is set in order to notch single CAN IDs or
+CAN ID ranges from the incoming traffic.
+
+
+RAW Socket Returned Message Flags
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When using recvmsg() call, the msg->msg_flags may contain following flags:
+
+MSG_DONTROUTE:
+ set when the received frame was created on the local host.
+
+MSG_CONFIRM:
+ set when the frame was sent via the socket it is received on.
+ This flag can be interpreted as a 'transmission confirmation' when the
+ CAN driver supports the echo of frames on driver level, see
+ :ref:`socketcan-local-loopback1` and :ref:`socketcan-local-loopback2`.
+ In order to receive such messages, CAN_RAW_RECV_OWN_MSGS must be set.
+
+
+Broadcast Manager Protocol Sockets (SOCK_DGRAM)
+-----------------------------------------------
+
+The Broadcast Manager protocol provides a command based configuration
+interface to filter and send (e.g. cyclic) CAN messages in kernel space.
+
+Receive filters can be used to down sample frequent messages; detect events
+such as message contents changes, packet length changes, and do time-out
+monitoring of received messages.
+
+Periodic transmission tasks of CAN frames or a sequence of CAN frames can be
+created and modified at runtime; both the message content and the two
+possible transmit intervals can be altered.
+
+A BCM socket is not intended for sending individual CAN frames using the
+struct can_frame as known from the CAN_RAW socket. Instead a special BCM
+configuration message is defined. The basic BCM configuration message used
+to communicate with the broadcast manager and the available operations are
+defined in the linux/can/bcm.h include. The BCM message consists of a
+message header with a command ('opcode') followed by zero or more CAN frames.
+The broadcast manager sends responses to user space in the same form:
+
+.. code-block:: C
+
+ struct bcm_msg_head {
+ __u32 opcode; /* command */
+ __u32 flags; /* special flags */
+ __u32 count; /* run 'count' times with ival1 */
+ struct timeval ival1, ival2; /* count and subsequent interval */
+ canid_t can_id; /* unique can_id for task */
+ __u32 nframes; /* number of can_frames following */
+ struct can_frame frames[0];
+ };
+
+The aligned payload 'frames' uses the same basic CAN frame structure defined
+at the beginning of :ref:`socketcan-rawfd` and in the include/linux/can.h include. All
+messages to the broadcast manager from user space have this structure.
+
+Note a CAN_BCM socket must be connected instead of bound after socket
+creation (example without error checking):
+
+.. code-block:: C
+
+ int s;
+ struct sockaddr_can addr;
+ struct ifreq ifr;
+
+ s = socket(PF_CAN, SOCK_DGRAM, CAN_BCM);
+
+ strcpy(ifr.ifr_name, "can0");
+ ioctl(s, SIOCGIFINDEX, &ifr);
+
+ addr.can_family = AF_CAN;
+ addr.can_ifindex = ifr.ifr_ifindex;
+
+ connect(s, (struct sockaddr *)&addr, sizeof(addr));
+
+ (..)
+
+The broadcast manager socket is able to handle any number of in flight
+transmissions or receive filters concurrently. The different RX/TX jobs are
+distinguished by the unique can_id in each BCM message. However additional
+CAN_BCM sockets are recommended to communicate on multiple CAN interfaces.
+When the broadcast manager socket is bound to 'any' CAN interface (=> the
+interface index is set to zero) the configured receive filters apply to any
+CAN interface unless the sendto() syscall is used to overrule the 'any' CAN
+interface index. When using recvfrom() instead of read() to retrieve BCM
+socket messages the originating CAN interface is provided in can_ifindex.
+
+
+Broadcast Manager Operations
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The opcode defines the operation for the broadcast manager to carry out,
+or details the broadcast managers response to several events, including
+user requests.
+
+Transmit Operations (user space to broadcast manager):
+
+TX_SETUP:
+ Create (cyclic) transmission task.
+
+TX_DELETE:
+ Remove (cyclic) transmission task, requires only can_id.
+
+TX_READ:
+ Read properties of (cyclic) transmission task for can_id.
+
+TX_SEND:
+ Send one CAN frame.
+
+Transmit Responses (broadcast manager to user space):
+
+TX_STATUS:
+ Reply to TX_READ request (transmission task configuration).
+
+TX_EXPIRED:
+ Notification when counter finishes sending at initial interval
+ 'ival1'. Requires the TX_COUNTEVT flag to be set at TX_SETUP.
+
+Receive Operations (user space to broadcast manager):
+
+RX_SETUP:
+ Create RX content filter subscription.
+
+RX_DELETE:
+ Remove RX content filter subscription, requires only can_id.
+
+RX_READ:
+ Read properties of RX content filter subscription for can_id.
+
+Receive Responses (broadcast manager to user space):
+
+RX_STATUS:
+ Reply to RX_READ request (filter task configuration).
+
+RX_TIMEOUT:
+ Cyclic message is detected to be absent (timer ival1 expired).
+
+RX_CHANGED:
+ BCM message with updated CAN frame (detected content change).
+ Sent on first message received or on receipt of revised CAN messages.
+
+
+Broadcast Manager Message Flags
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When sending a message to the broadcast manager the 'flags' element may
+contain the following flag definitions which influence the behaviour:
+
+SETTIMER:
+ Set the values of ival1, ival2 and count
+
+STARTTIMER:
+ Start the timer with the actual values of ival1, ival2
+ and count. Starting the timer leads simultaneously to emit a CAN frame.
+
+TX_COUNTEVT:
+ Create the message TX_EXPIRED when count expires
+
+TX_ANNOUNCE:
+ A change of data by the process is emitted immediately.
+
+TX_CP_CAN_ID:
+ Copies the can_id from the message header to each
+ subsequent frame in frames. This is intended as usage simplification. For
+ TX tasks the unique can_id from the message header may differ from the
+ can_id(s) stored for transmission in the subsequent struct can_frame(s).
+
+RX_FILTER_ID:
+ Filter by can_id alone, no frames required (nframes=0).
+
+RX_CHECK_DLC:
+ A change of the DLC leads to an RX_CHANGED.
+
+RX_NO_AUTOTIMER:
+ Prevent automatically starting the timeout monitor.
+
+RX_ANNOUNCE_RESUME:
+ If passed at RX_SETUP and a receive timeout occurred, a
+ RX_CHANGED message will be generated when the (cyclic) receive restarts.
+
+TX_RESET_MULTI_IDX:
+ Reset the index for the multiple frame transmission.
+
+RX_RTR_FRAME:
+ Send reply for RTR-request (placed in op->frames[0]).
+
+
+Broadcast Manager Transmission Timers
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Periodic transmission configurations may use up to two interval timers.
+In this case the BCM sends a number of messages ('count') at an interval
+'ival1', then continuing to send at another given interval 'ival2'. When
+only one timer is needed 'count' is set to zero and only 'ival2' is used.
+When SET_TIMER and START_TIMER flag were set the timers are activated.
+The timer values can be altered at runtime when only SET_TIMER is set.
+
+
+Broadcast Manager message sequence transmission
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Up to 256 CAN frames can be transmitted in a sequence in the case of a cyclic
+TX task configuration. The number of CAN frames is provided in the 'nframes'
+element of the BCM message head. The defined number of CAN frames are added
+as array to the TX_SETUP BCM configuration message:
+
+.. code-block:: C
+
+ /* create a struct to set up a sequence of four CAN frames */
+ struct {
+ struct bcm_msg_head msg_head;
+ struct can_frame frame[4];
+ } mytxmsg;
+
+ (..)
+ mytxmsg.msg_head.nframes = 4;
+ (..)
+
+ write(s, &mytxmsg, sizeof(mytxmsg));
+
+With every transmission the index in the array of CAN frames is increased
+and set to zero at index overflow.
+
+
+Broadcast Manager Receive Filter Timers
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The timer values ival1 or ival2 may be set to non-zero values at RX_SETUP.
+When the SET_TIMER flag is set the timers are enabled:
+
+ival1:
+ Send RX_TIMEOUT when a received message is not received again within
+ the given time. When START_TIMER is set at RX_SETUP the timeout detection
+ is activated directly - even without a former CAN frame reception.
+
+ival2:
+ Throttle the received message rate down to the value of ival2. This
+ is useful to reduce messages for the application when the signal inside the
+ CAN frame is stateless as state changes within the ival2 periode may get
+ lost.
+
+Broadcast Manager Multiplex Message Receive Filter
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To filter for content changes in multiplex message sequences an array of more
+than one CAN frames can be passed in a RX_SETUP configuration message. The
+data bytes of the first CAN frame contain the mask of relevant bits that
+have to match in the subsequent CAN frames with the received CAN frame.
+If one of the subsequent CAN frames is matching the bits in that frame data
+mark the relevant content to be compared with the previous received content.
+Up to 257 CAN frames (multiplex filter bit mask CAN frame plus 256 CAN
+filters) can be added as array to the TX_SETUP BCM configuration message:
+
+.. code-block:: C
+
+ /* usually used to clear CAN frame data[] - beware of endian problems! */
+ #define U64_DATA(p) (*(unsigned long long*)(p)->data)
+
+ struct {
+ struct bcm_msg_head msg_head;
+ struct can_frame frame[5];
+ } msg;
+
+ msg.msg_head.opcode = RX_SETUP;
+ msg.msg_head.can_id = 0x42;
+ msg.msg_head.flags = 0;
+ msg.msg_head.nframes = 5;
+ U64_DATA(&msg.frame[0]) = 0xFF00000000000000ULL; /* MUX mask */
+ U64_DATA(&msg.frame[1]) = 0x01000000000000FFULL; /* data mask (MUX 0x01) */
+ U64_DATA(&msg.frame[2]) = 0x0200FFFF000000FFULL; /* data mask (MUX 0x02) */
+ U64_DATA(&msg.frame[3]) = 0x330000FFFFFF0003ULL; /* data mask (MUX 0x33) */
+ U64_DATA(&msg.frame[4]) = 0x4F07FC0FF0000000ULL; /* data mask (MUX 0x4F) */
+
+ write(s, &msg, sizeof(msg));
+
+
+Broadcast Manager CAN FD Support
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The programming API of the CAN_BCM depends on struct can_frame which is
+given as array directly behind the bcm_msg_head structure. To follow this
+schema for the CAN FD frames a new flag 'CAN_FD_FRAME' in the bcm_msg_head
+flags indicates that the concatenated CAN frame structures behind the
+bcm_msg_head are defined as struct canfd_frame:
+
+.. code-block:: C
+
+ struct {
+ struct bcm_msg_head msg_head;
+ struct canfd_frame frame[5];
+ } msg;
+
+ msg.msg_head.opcode = RX_SETUP;
+ msg.msg_head.can_id = 0x42;
+ msg.msg_head.flags = CAN_FD_FRAME;
+ msg.msg_head.nframes = 5;
+ (..)
+
+When using CAN FD frames for multiplex filtering the MUX mask is still
+expected in the first 64 bit of the struct canfd_frame data section.
+
+
+Connected Transport Protocols (SOCK_SEQPACKET)
+----------------------------------------------
+
+(to be written)
+
+
+Unconnected Transport Protocols (SOCK_DGRAM)
+--------------------------------------------
+
+(to be written)
+
+
+.. _socketcan-core-module:
+
+SocketCAN Core Module
+=====================
+
+The SocketCAN core module implements the protocol family
+PF_CAN. CAN protocol modules are loaded by the core module at
+runtime. The core module provides an interface for CAN protocol
+modules to subscribe needed CAN IDs (see :ref:`socketcan-receive-lists`).
+
+
+can.ko Module Params
+--------------------
+
+- **stats_timer**:
+ To calculate the SocketCAN core statistics
+ (e.g. current/maximum frames per second) this 1 second timer is
+ invoked at can.ko module start time by default. This timer can be
+ disabled by using stattimer=0 on the module commandline.
+
+- **debug**:
+ (removed since SocketCAN SVN r546)
+
+
+procfs content
+--------------
+
+As described in :ref:`socketcan-receive-lists` the SocketCAN core uses several filter
+lists to deliver received CAN frames to CAN protocol modules. These
+receive lists, their filters and the count of filter matches can be
+checked in the appropriate receive list. All entries contain the
+device and a protocol module identifier::
+
+ foo@bar:~$ cat /proc/net/can/rcvlist_all
+
+ receive list 'rx_all':
+ (vcan3: no entry)
+ (vcan2: no entry)
+ (vcan1: no entry)
+ device can_id can_mask function userdata matches ident
+ vcan0 000 00000000 f88e6370 f6c6f400 0 raw
+ (any: no entry)
+
+In this example an application requests any CAN traffic from vcan0::
+
+ rcvlist_all - list for unfiltered entries (no filter operations)
+ rcvlist_eff - list for single extended frame (EFF) entries
+ rcvlist_err - list for error message frames masks
+ rcvlist_fil - list for mask/value filters
+ rcvlist_inv - list for mask/value filters (inverse semantic)
+ rcvlist_sff - list for single standard frame (SFF) entries
+
+Additional procfs files in /proc/net/can::
+
+ stats - SocketCAN core statistics (rx/tx frames, match ratios, ...)
+ reset_stats - manual statistic reset
+ version - prints the SocketCAN core version and the ABI version
+
+
+Writing Own CAN Protocol Modules
+--------------------------------
+
+To implement a new protocol in the protocol family PF_CAN a new
+protocol has to be defined in include/linux/can.h .
+The prototypes and definitions to use the SocketCAN core can be
+accessed by including include/linux/can/core.h .
+In addition to functions that register the CAN protocol and the
+CAN device notifier chain there are functions to subscribe CAN
+frames received by CAN interfaces and to send CAN frames::
+
+ can_rx_register - subscribe CAN frames from a specific interface
+ can_rx_unregister - unsubscribe CAN frames from a specific interface
+ can_send - transmit a CAN frame (optional with local loopback)
+
+For details see the kerneldoc documentation in net/can/af_can.c or
+the source code of net/can/raw.c or net/can/bcm.c .
+
+
+CAN Network Drivers
+===================
+
+Writing a CAN network device driver is much easier than writing a
+CAN character device driver. Similar to other known network device
+drivers you mainly have to deal with:
+
+- TX: Put the CAN frame from the socket buffer to the CAN controller.
+- RX: Put the CAN frame from the CAN controller to the socket buffer.
+
+See e.g. at Documentation/networking/netdevices.txt . The differences
+for writing CAN network device driver are described below:
+
+
+General Settings
+----------------
+
+.. code-block:: C
+
+ dev->type = ARPHRD_CAN; /* the netdevice hardware type */
+ dev->flags = IFF_NOARP; /* CAN has no arp */
+
+ dev->mtu = CAN_MTU; /* sizeof(struct can_frame) -> legacy CAN interface */
+
+ or alternative, when the controller supports CAN with flexible data rate:
+ dev->mtu = CANFD_MTU; /* sizeof(struct canfd_frame) -> CAN FD interface */
+
+The struct can_frame or struct canfd_frame is the payload of each socket
+buffer (skbuff) in the protocol family PF_CAN.
+
+
+.. _socketcan-local-loopback2:
+
+Local Loopback of Sent Frames
+-----------------------------
+
+As described in :ref:`socketcan-local-loopback1` the CAN network device driver should
+support a local loopback functionality similar to the local echo
+e.g. of tty devices. In this case the driver flag IFF_ECHO has to be
+set to prevent the PF_CAN core from locally echoing sent frames
+(aka loopback) as fallback solution::
+
+ dev->flags = (IFF_NOARP | IFF_ECHO);
+
+
+CAN Controller Hardware Filters
+-------------------------------
+
+To reduce the interrupt load on deep embedded systems some CAN
+controllers support the filtering of CAN IDs or ranges of CAN IDs.
+These hardware filter capabilities vary from controller to
+controller and have to be identified as not feasible in a multi-user
+networking approach. The use of the very controller specific
+hardware filters could make sense in a very dedicated use-case, as a
+filter on driver level would affect all users in the multi-user
+system. The high efficient filter sets inside the PF_CAN core allow
+to set different multiple filters for each socket separately.
+Therefore the use of hardware filters goes to the category 'handmade
+tuning on deep embedded systems'. The author is running a MPC603e
+@133MHz with four SJA1000 CAN controllers from 2002 under heavy bus
+load without any problems ...
+
+
+The Virtual CAN Driver (vcan)
+-----------------------------
+
+Similar to the network loopback devices, vcan offers a virtual local
+CAN interface. A full qualified address on CAN consists of
+
+- a unique CAN Identifier (CAN ID)
+- the CAN bus this CAN ID is transmitted on (e.g. can0)
+
+so in common use cases more than one virtual CAN interface is needed.
+
+The virtual CAN interfaces allow the transmission and reception of CAN
+frames without real CAN controller hardware. Virtual CAN network
+devices are usually named 'vcanX', like vcan0 vcan1 vcan2 ...
+When compiled as a module the virtual CAN driver module is called vcan.ko
+
+Since Linux Kernel version 2.6.24 the vcan driver supports the Kernel
+netlink interface to create vcan network devices. The creation and
+removal of vcan network devices can be managed with the ip(8) tool::
+
+ - Create a virtual CAN network interface:
+ $ ip link add type vcan
+
+ - Create a virtual CAN network interface with a specific name 'vcan42':
+ $ ip link add dev vcan42 type vcan
+
+ - Remove a (virtual CAN) network interface 'vcan42':
+ $ ip link del vcan42
+
+
+The CAN Network Device Driver Interface
+---------------------------------------
+
+The CAN network device driver interface provides a generic interface
+to setup, configure and monitor CAN network devices. The user can then
+configure the CAN device, like setting the bit-timing parameters, via
+the netlink interface using the program "ip" from the "IPROUTE2"
+utility suite. The following chapter describes briefly how to use it.
+Furthermore, the interface uses a common data structure and exports a
+set of common functions, which all real CAN network device drivers
+should use. Please have a look to the SJA1000 or MSCAN driver to
+understand how to use them. The name of the module is can-dev.ko.
+
+
+Netlink interface to set/get devices properties
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The CAN device must be configured via netlink interface. The supported
+netlink message types are defined and briefly described in
+"include/linux/can/netlink.h". CAN link support for the program "ip"
+of the IPROUTE2 utility suite is available and it can be used as shown
+below:
+
+Setting CAN device properties::
+
+ $ ip link set can0 type can help
+ Usage: ip link set DEVICE type can
+ [ bitrate BITRATE [ sample-point SAMPLE-POINT] ] |
+ [ tq TQ prop-seg PROP_SEG phase-seg1 PHASE-SEG1
+ phase-seg2 PHASE-SEG2 [ sjw SJW ] ]
+
+ [ dbitrate BITRATE [ dsample-point SAMPLE-POINT] ] |
+ [ dtq TQ dprop-seg PROP_SEG dphase-seg1 PHASE-SEG1
+ dphase-seg2 PHASE-SEG2 [ dsjw SJW ] ]
+
+ [ loopback { on | off } ]
+ [ listen-only { on | off } ]
+ [ triple-sampling { on | off } ]
+ [ one-shot { on | off } ]
+ [ berr-reporting { on | off } ]
+ [ fd { on | off } ]
+ [ fd-non-iso { on | off } ]
+ [ presume-ack { on | off } ]
+
+ [ restart-ms TIME-MS ]
+ [ restart ]
+
+ Where: BITRATE := { 1..1000000 }
+ SAMPLE-POINT := { 0.000..0.999 }
+ TQ := { NUMBER }
+ PROP-SEG := { 1..8 }
+ PHASE-SEG1 := { 1..8 }
+ PHASE-SEG2 := { 1..8 }
+ SJW := { 1..4 }
+ RESTART-MS := { 0 | NUMBER }
+
+Display CAN device details and statistics::
+
+ $ ip -details -statistics link show can0
+ 2: can0: <NOARP,UP,LOWER_UP,ECHO> mtu 16 qdisc pfifo_fast state UP qlen 10
+ link/can
+ can <TRIPLE-SAMPLING> state ERROR-ACTIVE restart-ms 100
+ bitrate 125000 sample_point 0.875
+ tq 125 prop-seg 6 phase-seg1 7 phase-seg2 2 sjw 1
+ sja1000: tseg1 1..16 tseg2 1..8 sjw 1..4 brp 1..64 brp-inc 1
+ clock 8000000
+ re-started bus-errors arbit-lost error-warn error-pass bus-off
+ 41 17457 0 41 42 41
+ RX: bytes packets errors dropped overrun mcast
+ 140859 17608 17457 0 0 0
+ TX: bytes packets errors dropped carrier collsns
+ 861 112 0 41 0 0
+
+More info to the above output:
+
+"<TRIPLE-SAMPLING>"
+ Shows the list of selected CAN controller modes: LOOPBACK,
+ LISTEN-ONLY, or TRIPLE-SAMPLING.
+
+"state ERROR-ACTIVE"
+ The current state of the CAN controller: "ERROR-ACTIVE",
+ "ERROR-WARNING", "ERROR-PASSIVE", "BUS-OFF" or "STOPPED"
+
+"restart-ms 100"
+ Automatic restart delay time. If set to a non-zero value, a
+ restart of the CAN controller will be triggered automatically
+ in case of a bus-off condition after the specified delay time
+ in milliseconds. By default it's off.
+
+"bitrate 125000 sample-point 0.875"
+ Shows the real bit-rate in bits/sec and the sample-point in the
+ range 0.000..0.999. If the calculation of bit-timing parameters
+ is enabled in the kernel (CONFIG_CAN_CALC_BITTIMING=y), the
+ bit-timing can be defined by setting the "bitrate" argument.
+ Optionally the "sample-point" can be specified. By default it's
+ 0.000 assuming CIA-recommended sample-points.
+
+"tq 125 prop-seg 6 phase-seg1 7 phase-seg2 2 sjw 1"
+ Shows the time quanta in ns, propagation segment, phase buffer
+ segment 1 and 2 and the synchronisation jump width in units of
+ tq. They allow to define the CAN bit-timing in a hardware
+ independent format as proposed by the Bosch CAN 2.0 spec (see
+ chapter 8 of http://www.semiconductors.bosch.de/pdf/can2spec.pdf).
+
+"sja1000: tseg1 1..16 tseg2 1..8 sjw 1..4 brp 1..64 brp-inc 1 clock 8000000"
+ Shows the bit-timing constants of the CAN controller, here the
+ "sja1000". The minimum and maximum values of the time segment 1
+ and 2, the synchronisation jump width in units of tq, the
+ bitrate pre-scaler and the CAN system clock frequency in Hz.
+ These constants could be used for user-defined (non-standard)
+ bit-timing calculation algorithms in user-space.
+
+"re-started bus-errors arbit-lost error-warn error-pass bus-off"
+ Shows the number of restarts, bus and arbitration lost errors,
+ and the state changes to the error-warning, error-passive and
+ bus-off state. RX overrun errors are listed in the "overrun"
+ field of the standard network statistics.
+
+Setting the CAN Bit-Timing
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The CAN bit-timing parameters can always be defined in a hardware
+independent format as proposed in the Bosch CAN 2.0 specification
+specifying the arguments "tq", "prop_seg", "phase_seg1", "phase_seg2"
+and "sjw"::
+
+ $ ip link set canX type can tq 125 prop-seg 6 \
+ phase-seg1 7 phase-seg2 2 sjw 1
+
+If the kernel option CONFIG_CAN_CALC_BITTIMING is enabled, CIA
+recommended CAN bit-timing parameters will be calculated if the bit-
+rate is specified with the argument "bitrate"::
+
+ $ ip link set canX type can bitrate 125000
+
+Note that this works fine for the most common CAN controllers with
+standard bit-rates but may *fail* for exotic bit-rates or CAN system
+clock frequencies. Disabling CONFIG_CAN_CALC_BITTIMING saves some
+space and allows user-space tools to solely determine and set the
+bit-timing parameters. The CAN controller specific bit-timing
+constants can be used for that purpose. They are listed by the
+following command::
+
+ $ ip -details link show can0
+ ...
+ sja1000: clock 8000000 tseg1 1..16 tseg2 1..8 sjw 1..4 brp 1..64 brp-inc 1
+
+
+Starting and Stopping the CAN Network Device
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+A CAN network device is started or stopped as usual with the command
+"ifconfig canX up/down" or "ip link set canX up/down". Be aware that
+you *must* define proper bit-timing parameters for real CAN devices
+before you can start it to avoid error-prone default settings::
+
+ $ ip link set canX up type can bitrate 125000
+
+A device may enter the "bus-off" state if too many errors occurred on
+the CAN bus. Then no more messages are received or sent. An automatic
+bus-off recovery can be enabled by setting the "restart-ms" to a
+non-zero value, e.g.::
+
+ $ ip link set canX type can restart-ms 100
+
+Alternatively, the application may realize the "bus-off" condition
+by monitoring CAN error message frames and do a restart when
+appropriate with the command::
+
+ $ ip link set canX type can restart
+
+Note that a restart will also create a CAN error message frame (see
+also :ref:`socketcan-network-problem-notifications`).
+
+
+.. _socketcan-can-fd-driver:
+
+CAN FD (Flexible Data Rate) Driver Support
+------------------------------------------
+
+CAN FD capable CAN controllers support two different bitrates for the
+arbitration phase and the payload phase of the CAN FD frame. Therefore a
+second bit timing has to be specified in order to enable the CAN FD bitrate.
+
+Additionally CAN FD capable CAN controllers support up to 64 bytes of
+payload. The representation of this length in can_frame.can_dlc and
+canfd_frame.len for userspace applications and inside the Linux network
+layer is a plain value from 0 .. 64 instead of the CAN 'data length code'.
+The data length code was a 1:1 mapping to the payload length in the legacy
+CAN frames anyway. The payload length to the bus-relevant DLC mapping is
+only performed inside the CAN drivers, preferably with the helper
+functions can_dlc2len() and can_len2dlc().
+
+The CAN netdevice driver capabilities can be distinguished by the network
+devices maximum transfer unit (MTU)::
+
+ MTU = 16 (CAN_MTU) => sizeof(struct can_frame) => 'legacy' CAN device
+ MTU = 72 (CANFD_MTU) => sizeof(struct canfd_frame) => CAN FD capable device
+
+The CAN device MTU can be retrieved e.g. with a SIOCGIFMTU ioctl() syscall.
+N.B. CAN FD capable devices can also handle and send legacy CAN frames.
+
+When configuring CAN FD capable CAN controllers an additional 'data' bitrate
+has to be set. This bitrate for the data phase of the CAN FD frame has to be
+at least the bitrate which was configured for the arbitration phase. This
+second bitrate is specified analogue to the first bitrate but the bitrate
+setting keywords for the 'data' bitrate start with 'd' e.g. dbitrate,
+dsample-point, dsjw or dtq and similar settings. When a data bitrate is set
+within the configuration process the controller option "fd on" can be
+specified to enable the CAN FD mode in the CAN controller. This controller
+option also switches the device MTU to 72 (CANFD_MTU).
+
+The first CAN FD specification presented as whitepaper at the International
+CAN Conference 2012 needed to be improved for data integrity reasons.
+Therefore two CAN FD implementations have to be distinguished today:
+
+- ISO compliant: The ISO 11898-1:2015 CAN FD implementation (default)
+- non-ISO compliant: The CAN FD implementation following the 2012 whitepaper
+
+Finally there are three types of CAN FD controllers:
+
+1. ISO compliant (fixed)
+2. non-ISO compliant (fixed, like the M_CAN IP core v3.0.1 in m_can.c)
+3. ISO/non-ISO CAN FD controllers (switchable, like the PEAK PCAN-USB FD)
+
+The current ISO/non-ISO mode is announced by the CAN controller driver via
+netlink and displayed by the 'ip' tool (controller option FD-NON-ISO).
+The ISO/non-ISO-mode can be altered by setting 'fd-non-iso {on|off}' for
+switchable CAN FD controllers only.
+
+Example configuring 500 kbit/s arbitration bitrate and 4 Mbit/s data bitrate::
+
+ $ ip link set can0 up type can bitrate 500000 sample-point 0.75 \
+ dbitrate 4000000 dsample-point 0.8 fd on
+ $ ip -details link show can0
+ 5: can0: <NOARP,UP,LOWER_UP,ECHO> mtu 72 qdisc pfifo_fast state UNKNOWN \
+ mode DEFAULT group default qlen 10
+ link/can promiscuity 0
+ can <FD> state ERROR-ACTIVE (berr-counter tx 0 rx 0) restart-ms 0
+ bitrate 500000 sample-point 0.750
+ tq 50 prop-seg 14 phase-seg1 15 phase-seg2 10 sjw 1
+ pcan_usb_pro_fd: tseg1 1..64 tseg2 1..16 sjw 1..16 brp 1..1024 \
+ brp-inc 1
+ dbitrate 4000000 dsample-point 0.800
+ dtq 12 dprop-seg 7 dphase-seg1 8 dphase-seg2 4 dsjw 1
+ pcan_usb_pro_fd: dtseg1 1..16 dtseg2 1..8 dsjw 1..4 dbrp 1..1024 \
+ dbrp-inc 1
+ clock 80000000
+
+Example when 'fd-non-iso on' is added on this switchable CAN FD adapter::
+
+ can <FD,FD-NON-ISO> state ERROR-ACTIVE (berr-counter tx 0 rx 0) restart-ms 0
+
+
+Supported CAN Hardware
+----------------------
+
+Please check the "Kconfig" file in "drivers/net/can" to get an actual
+list of the support CAN hardware. On the SocketCAN project website
+(see :ref:`socketcan-resources`) there might be further drivers available, also for
+older kernel versions.
+
+
+.. _socketcan-resources:
+
+SocketCAN Resources
+===================
+
+The Linux CAN / SocketCAN project resources (project site / mailing list)
+are referenced in the MAINTAINERS file in the Linux source tree.
+Search for CAN NETWORK [LAYERS|DRIVERS].
+
+Credits
+=======
+
+- Oliver Hartkopp (PF_CAN core, filters, drivers, bcm, SJA1000 driver)
+- Urs Thuermann (PF_CAN core, kernel integration, socket interfaces, raw, vcan)
+- Jan Kizka (RT-SocketCAN core, Socket-API reconciliation)
+- Wolfgang Grandegger (RT-SocketCAN core & drivers, Raw Socket-API reviews, CAN device driver interface, MSCAN driver)
+- Robert Schwebel (design reviews, PTXdist integration)
+- Marc Kleine-Budde (design reviews, Kernel 2.6 cleanups, drivers)
+- Benedikt Spranger (reviews)
+- Thomas Gleixner (LKML reviews, coding style, posting hints)
+- Andrey Volkov (kernel subtree structure, ioctls, MSCAN driver)
+- Matthias Brukner (first SJA1000 CAN netdevice implementation Q2/2003)
+- Klaus Hitschler (PEAK driver integration)
+- Uwe Koppe (CAN netdevices with PF_PACKET approach)
+- Michael Schulze (driver layer loopback requirement, RT CAN drivers review)
+- Pavel Pisa (Bit-timing calculation)
+- Sascha Hauer (SJA1000 platform driver)
+- Sebastian Haas (SJA1000 EMS PCI driver)
+- Markus Plessing (SJA1000 EMS PCI driver)
+- Per Dalen (SJA1000 Kvaser PCI driver)
+- Sam Ravnborg (reviews, coding style, kbuild help)
diff --git a/Documentation/networking/can.txt b/Documentation/networking/can.txt
deleted file mode 100644
index aa15b9ee2e70..000000000000
--- a/Documentation/networking/can.txt
+++ /dev/null
@@ -1,1308 +0,0 @@
-============================================================================
-
-can.txt
-
-Readme file for the Controller Area Network Protocol Family (aka SocketCAN)
-
-This file contains
-
- 1 Overview / What is SocketCAN
-
- 2 Motivation / Why using the socket API
-
- 3 SocketCAN concept
- 3.1 receive lists
- 3.2 local loopback of sent frames
- 3.3 network problem notifications
-
- 4 How to use SocketCAN
- 4.1 RAW protocol sockets with can_filters (SOCK_RAW)
- 4.1.1 RAW socket option CAN_RAW_FILTER
- 4.1.2 RAW socket option CAN_RAW_ERR_FILTER
- 4.1.3 RAW socket option CAN_RAW_LOOPBACK
- 4.1.4 RAW socket option CAN_RAW_RECV_OWN_MSGS
- 4.1.5 RAW socket option CAN_RAW_FD_FRAMES
- 4.1.6 RAW socket option CAN_RAW_JOIN_FILTERS
- 4.1.7 RAW socket returned message flags
- 4.2 Broadcast Manager protocol sockets (SOCK_DGRAM)
- 4.2.1 Broadcast Manager operations
- 4.2.2 Broadcast Manager message flags
- 4.2.3 Broadcast Manager transmission timers
- 4.2.4 Broadcast Manager message sequence transmission
- 4.2.5 Broadcast Manager receive filter timers
- 4.2.6 Broadcast Manager multiplex message receive filter
- 4.2.7 Broadcast Manager CAN FD support
- 4.3 connected transport protocols (SOCK_SEQPACKET)
- 4.4 unconnected transport protocols (SOCK_DGRAM)
-
- 5 SocketCAN core module
- 5.1 can.ko module params
- 5.2 procfs content
- 5.3 writing own CAN protocol modules
-
- 6 CAN network drivers
- 6.1 general settings
- 6.2 local loopback of sent frames
- 6.3 CAN controller hardware filters
- 6.4 The virtual CAN driver (vcan)
- 6.5 The CAN network device driver interface
- 6.5.1 Netlink interface to set/get devices properties
- 6.5.2 Setting the CAN bit-timing
- 6.5.3 Starting and stopping the CAN network device
- 6.6 CAN FD (flexible data rate) driver support
- 6.7 supported CAN hardware
-
- 7 SocketCAN resources
-
- 8 Credits
-
-============================================================================
-
-1. Overview / What is SocketCAN
---------------------------------
-
-The socketcan package is an implementation of CAN protocols
-(Controller Area Network) for Linux. CAN is a networking technology
-which has widespread use in automation, embedded devices, and
-automotive fields. While there have been other CAN implementations
-for Linux based on character devices, SocketCAN uses the Berkeley
-socket API, the Linux network stack and implements the CAN device
-drivers as network interfaces. The CAN socket API has been designed
-as similar as possible to the TCP/IP protocols to allow programmers,
-familiar with network programming, to easily learn how to use CAN
-sockets.
-
-2. Motivation / Why using the socket API
-----------------------------------------
-
-There have been CAN implementations for Linux before SocketCAN so the
-question arises, why we have started another project. Most existing
-implementations come as a device driver for some CAN hardware, they
-are based on character devices and provide comparatively little
-functionality. Usually, there is only a hardware-specific device
-driver which provides a character device interface to send and
-receive raw CAN frames, directly to/from the controller hardware.
-Queueing of frames and higher-level transport protocols like ISO-TP
-have to be implemented in user space applications. Also, most
-character-device implementations support only one single process to
-open the device at a time, similar to a serial interface. Exchanging
-the CAN controller requires employment of another device driver and
-often the need for adaption of large parts of the application to the
-new driver's API.
-
-SocketCAN was designed to overcome all of these limitations. A new
-protocol family has been implemented which provides a socket interface
-to user space applications and which builds upon the Linux network
-layer, enabling use all of the provided queueing functionality. A device
-driver for CAN controller hardware registers itself with the Linux
-network layer as a network device, so that CAN frames from the
-controller can be passed up to the network layer and on to the CAN
-protocol family module and also vice-versa. Also, the protocol family
-module provides an API for transport protocol modules to register, so
-that any number of transport protocols can be loaded or unloaded
-dynamically. In fact, the can core module alone does not provide any
-protocol and cannot be used without loading at least one additional
-protocol module. Multiple sockets can be opened at the same time,
-on different or the same protocol module and they can listen/send
-frames on different or the same CAN IDs. Several sockets listening on
-the same interface for frames with the same CAN ID are all passed the
-same received matching CAN frames. An application wishing to
-communicate using a specific transport protocol, e.g. ISO-TP, just
-selects that protocol when opening the socket, and then can read and
-write application data byte streams, without having to deal with
-CAN-IDs, frames, etc.
-
-Similar functionality visible from user-space could be provided by a
-character device, too, but this would lead to a technically inelegant
-solution for a couple of reasons:
-
-* Intricate usage. Instead of passing a protocol argument to
- socket(2) and using bind(2) to select a CAN interface and CAN ID, an
- application would have to do all these operations using ioctl(2)s.
-
-* Code duplication. A character device cannot make use of the Linux
- network queueing code, so all that code would have to be duplicated
- for CAN networking.
-
-* Abstraction. In most existing character-device implementations, the
- hardware-specific device driver for a CAN controller directly
- provides the character device for the application to work with.
- This is at least very unusual in Unix systems for both, char and
- block devices. For example you don't have a character device for a
- certain UART of a serial interface, a certain sound chip in your
- computer, a SCSI or IDE controller providing access to your hard
- disk or tape streamer device. Instead, you have abstraction layers
- which provide a unified character or block device interface to the
- application on the one hand, and a interface for hardware-specific
- device drivers on the other hand. These abstractions are provided
- by subsystems like the tty layer, the audio subsystem or the SCSI
- and IDE subsystems for the devices mentioned above.
-
- The easiest way to implement a CAN device driver is as a character
- device without such a (complete) abstraction layer, as is done by most
- existing drivers. The right way, however, would be to add such a
- layer with all the functionality like registering for certain CAN
- IDs, supporting several open file descriptors and (de)multiplexing
- CAN frames between them, (sophisticated) queueing of CAN frames, and
- providing an API for device drivers to register with. However, then
- it would be no more difficult, or may be even easier, to use the
- networking framework provided by the Linux kernel, and this is what
- SocketCAN does.
-
- The use of the networking framework of the Linux kernel is just the
- natural and most appropriate way to implement CAN for Linux.
-
-3. SocketCAN concept
----------------------
-
- As described in chapter 2 it is the main goal of SocketCAN to
- provide a socket interface to user space applications which builds
- upon the Linux network layer. In contrast to the commonly known
- TCP/IP and ethernet networking, the CAN bus is a broadcast-only(!)
- medium that has no MAC-layer addressing like ethernet. The CAN-identifier
- (can_id) is used for arbitration on the CAN-bus. Therefore the CAN-IDs
- have to be chosen uniquely on the bus. When designing a CAN-ECU
- network the CAN-IDs are mapped to be sent by a specific ECU.
- For this reason a CAN-ID can be treated best as a kind of source address.
-
- 3.1 receive lists
-
- The network transparent access of multiple applications leads to the
- problem that different applications may be interested in the same
- CAN-IDs from the same CAN network interface. The SocketCAN core
- module - which implements the protocol family CAN - provides several
- high efficient receive lists for this reason. If e.g. a user space
- application opens a CAN RAW socket, the raw protocol module itself
- requests the (range of) CAN-IDs from the SocketCAN core that are
- requested by the user. The subscription and unsubscription of
- CAN-IDs can be done for specific CAN interfaces or for all(!) known
- CAN interfaces with the can_rx_(un)register() functions provided to
- CAN protocol modules by the SocketCAN core (see chapter 5).
- To optimize the CPU usage at runtime the receive lists are split up
- into several specific lists per device that match the requested
- filter complexity for a given use-case.
-
- 3.2 local loopback of sent frames
-
- As known from other networking concepts the data exchanging
- applications may run on the same or different nodes without any
- change (except for the according addressing information):
-
- ___ ___ ___ _______ ___
- | _ | | _ | | _ | | _ _ | | _ |
- ||A|| ||B|| ||C|| ||A| |B|| ||C||
- |___| |___| |___| |_______| |___|
- | | | | |
- -----------------(1)- CAN bus -(2)---------------
-
- To ensure that application A receives the same information in the
- example (2) as it would receive in example (1) there is need for
- some kind of local loopback of the sent CAN frames on the appropriate
- node.
-
- The Linux network devices (by default) just can handle the
- transmission and reception of media dependent frames. Due to the
- arbitration on the CAN bus the transmission of a low prio CAN-ID
- may be delayed by the reception of a high prio CAN frame. To
- reflect the correct* traffic on the node the loopback of the sent
- data has to be performed right after a successful transmission. If
- the CAN network interface is not capable of performing the loopback for
- some reason the SocketCAN core can do this task as a fallback solution.
- See chapter 6.2 for details (recommended).
-
- The loopback functionality is enabled by default to reflect standard
- networking behaviour for CAN applications. Due to some requests from
- the RT-SocketCAN group the loopback optionally may be disabled for each
- separate socket. See sockopts from the CAN RAW sockets in chapter 4.1.
-
- * = you really like to have this when you're running analyser tools
- like 'candump' or 'cansniffer' on the (same) node.
-
- 3.3 network problem notifications
-
- The use of the CAN bus may lead to several problems on the physical
- and media access control layer. Detecting and logging of these lower
- layer problems is a vital requirement for CAN users to identify
- hardware issues on the physical transceiver layer as well as
- arbitration problems and error frames caused by the different
- ECUs. The occurrence of detected errors are important for diagnosis
- and have to be logged together with the exact timestamp. For this
- reason the CAN interface driver can generate so called Error Message
- Frames that can optionally be passed to the user application in the
- same way as other CAN frames. Whenever an error on the physical layer
- or the MAC layer is detected (e.g. by the CAN controller) the driver
- creates an appropriate error message frame. Error messages frames can
- be requested by the user application using the common CAN filter
- mechanisms. Inside this filter definition the (interested) type of
- errors may be selected. The reception of error messages is disabled
- by default. The format of the CAN error message frame is briefly
- described in the Linux header file "include/uapi/linux/can/error.h".
-
-4. How to use SocketCAN
-------------------------
-
- Like TCP/IP, you first need to open a socket for communicating over a
- CAN network. Since SocketCAN implements a new protocol family, you
- need to pass PF_CAN as the first argument to the socket(2) system
- call. Currently, there are two CAN protocols to choose from, the raw
- socket protocol and the broadcast manager (BCM). So to open a socket,
- you would write
-
- s = socket(PF_CAN, SOCK_RAW, CAN_RAW);
-
- and
-
- s = socket(PF_CAN, SOCK_DGRAM, CAN_BCM);
-
- respectively. After the successful creation of the socket, you would
- normally use the bind(2) system call to bind the socket to a CAN
- interface (which is different from TCP/IP due to different addressing
- - see chapter 3). After binding (CAN_RAW) or connecting (CAN_BCM)
- the socket, you can read(2) and write(2) from/to the socket or use
- send(2), sendto(2), sendmsg(2) and the recv* counterpart operations
- on the socket as usual. There are also CAN specific socket options
- described below.
-
- The basic CAN frame structure and the sockaddr structure are defined
- in include/linux/can.h:
-
- struct can_frame {
- canid_t can_id; /* 32 bit CAN_ID + EFF/RTR/ERR flags */
- __u8 can_dlc; /* frame payload length in byte (0 .. 8) */
- __u8 __pad; /* padding */
- __u8 __res0; /* reserved / padding */
- __u8 __res1; /* reserved / padding */
- __u8 data[8] __attribute__((aligned(8)));
- };
-
- The alignment of the (linear) payload data[] to a 64bit boundary
- allows the user to define their own structs and unions to easily access
- the CAN payload. There is no given byteorder on the CAN bus by
- default. A read(2) system call on a CAN_RAW socket transfers a
- struct can_frame to the user space.
-
- The sockaddr_can structure has an interface index like the
- PF_PACKET socket, that also binds to a specific interface:
-
- struct sockaddr_can {
- sa_family_t can_family;
- int can_ifindex;
- union {
- /* transport protocol class address info (e.g. ISOTP) */
- struct { canid_t rx_id, tx_id; } tp;
-
- /* reserved for future CAN protocols address information */
- } can_addr;
- };
-
- To determine the interface index an appropriate ioctl() has to
- be used (example for CAN_RAW sockets without error checking):
-
- int s;
- struct sockaddr_can addr;
- struct ifreq ifr;
-
- s = socket(PF_CAN, SOCK_RAW, CAN_RAW);
-
- strcpy(ifr.ifr_name, "can0" );
- ioctl(s, SIOCGIFINDEX, &ifr);
-
- addr.can_family = AF_CAN;
- addr.can_ifindex = ifr.ifr_ifindex;
-
- bind(s, (struct sockaddr *)&addr, sizeof(addr));
-
- (..)
-
- To bind a socket to all(!) CAN interfaces the interface index must
- be 0 (zero). In this case the socket receives CAN frames from every
- enabled CAN interface. To determine the originating CAN interface
- the system call recvfrom(2) may be used instead of read(2). To send
- on a socket that is bound to 'any' interface sendto(2) is needed to
- specify the outgoing interface.
-
- Reading CAN frames from a bound CAN_RAW socket (see above) consists
- of reading a struct can_frame:
-
- struct can_frame frame;
-
- nbytes = read(s, &frame, sizeof(struct can_frame));
-
- if (nbytes < 0) {
- perror("can raw socket read");
- return 1;
- }
-
- /* paranoid check ... */
- if (nbytes < sizeof(struct can_frame)) {
- fprintf(stderr, "read: incomplete CAN frame\n");
- return 1;
- }
-
- /* do something with the received CAN frame */
-
- Writing CAN frames can be done similarly, with the write(2) system call:
-
- nbytes = write(s, &frame, sizeof(struct can_frame));
-
- When the CAN interface is bound to 'any' existing CAN interface
- (addr.can_ifindex = 0) it is recommended to use recvfrom(2) if the
- information about the originating CAN interface is needed:
-
- struct sockaddr_can addr;
- struct ifreq ifr;
- socklen_t len = sizeof(addr);
- struct can_frame frame;
-
- nbytes = recvfrom(s, &frame, sizeof(struct can_frame),
- 0, (struct sockaddr*)&addr, &len);
-
- /* get interface name of the received CAN frame */
- ifr.ifr_ifindex = addr.can_ifindex;
- ioctl(s, SIOCGIFNAME, &ifr);
- printf("Received a CAN frame from interface %s", ifr.ifr_name);
-
- To write CAN frames on sockets bound to 'any' CAN interface the
- outgoing interface has to be defined certainly.
-
- strcpy(ifr.ifr_name, "can0");
- ioctl(s, SIOCGIFINDEX, &ifr);
- addr.can_ifindex = ifr.ifr_ifindex;
- addr.can_family = AF_CAN;
-
- nbytes = sendto(s, &frame, sizeof(struct can_frame),
- 0, (struct sockaddr*)&addr, sizeof(addr));
-
- An accurate timestamp can be obtained with an ioctl(2) call after reading
- a message from the socket:
-
- struct timeval tv;
- ioctl(s, SIOCGSTAMP, &tv);
-
- The timestamp has a resolution of one microsecond and is set automatically
- at the reception of a CAN frame.
-
- Remark about CAN FD (flexible data rate) support:
-
- Generally the handling of CAN FD is very similar to the formerly described
- examples. The new CAN FD capable CAN controllers support two different
- bitrates for the arbitration phase and the payload phase of the CAN FD frame
- and up to 64 bytes of payload. This extended payload length breaks all the
- kernel interfaces (ABI) which heavily rely on the CAN frame with fixed eight
- bytes of payload (struct can_frame) like the CAN_RAW socket. Therefore e.g.
- the CAN_RAW socket supports a new socket option CAN_RAW_FD_FRAMES that
- switches the socket into a mode that allows the handling of CAN FD frames
- and (legacy) CAN frames simultaneously (see section 4.1.5).
-
- The struct canfd_frame is defined in include/linux/can.h:
-
- struct canfd_frame {
- canid_t can_id; /* 32 bit CAN_ID + EFF/RTR/ERR flags */
- __u8 len; /* frame payload length in byte (0 .. 64) */
- __u8 flags; /* additional flags for CAN FD */
- __u8 __res0; /* reserved / padding */
- __u8 __res1; /* reserved / padding */
- __u8 data[64] __attribute__((aligned(8)));
- };
-
- The struct canfd_frame and the existing struct can_frame have the can_id,
- the payload length and the payload data at the same offset inside their
- structures. This allows to handle the different structures very similar.
- When the content of a struct can_frame is copied into a struct canfd_frame
- all structure elements can be used as-is - only the data[] becomes extended.
-
- When introducing the struct canfd_frame it turned out that the data length
- code (DLC) of the struct can_frame was used as a length information as the
- length and the DLC has a 1:1 mapping in the range of 0 .. 8. To preserve
- the easy handling of the length information the canfd_frame.len element
- contains a plain length value from 0 .. 64. So both canfd_frame.len and
- can_frame.can_dlc are equal and contain a length information and no DLC.
- For details about the distinction of CAN and CAN FD capable devices and
- the mapping to the bus-relevant data length code (DLC), see chapter 6.6.
-
- The length of the two CAN(FD) frame structures define the maximum transfer
- unit (MTU) of the CAN(FD) network interface and skbuff data length. Two
- definitions are specified for CAN specific MTUs in include/linux/can.h :
-
- #define CAN_MTU (sizeof(struct can_frame)) == 16 => 'legacy' CAN frame
- #define CANFD_MTU (sizeof(struct canfd_frame)) == 72 => CAN FD frame
-
- 4.1 RAW protocol sockets with can_filters (SOCK_RAW)
-
- Using CAN_RAW sockets is extensively comparable to the commonly
- known access to CAN character devices. To meet the new possibilities
- provided by the multi user SocketCAN approach, some reasonable
- defaults are set at RAW socket binding time:
-
- - The filters are set to exactly one filter receiving everything
- - The socket only receives valid data frames (=> no error message frames)
- - The loopback of sent CAN frames is enabled (see chapter 3.2)
- - The socket does not receive its own sent frames (in loopback mode)
-
- These default settings may be changed before or after binding the socket.
- To use the referenced definitions of the socket options for CAN_RAW
- sockets, include <linux/can/raw.h>.
-
- 4.1.1 RAW socket option CAN_RAW_FILTER
-
- The reception of CAN frames using CAN_RAW sockets can be controlled
- by defining 0 .. n filters with the CAN_RAW_FILTER socket option.
-
- The CAN filter structure is defined in include/linux/can.h:
-
- struct can_filter {
- canid_t can_id;
- canid_t can_mask;
- };
-
- A filter matches, when
-
- <received_can_id> & mask == can_id & mask
-
- which is analogous to known CAN controllers hardware filter semantics.
- The filter can be inverted in this semantic, when the CAN_INV_FILTER
- bit is set in can_id element of the can_filter structure. In
- contrast to CAN controller hardware filters the user may set 0 .. n
- receive filters for each open socket separately:
-
- struct can_filter rfilter[2];
-
- rfilter[0].can_id = 0x123;
- rfilter[0].can_mask = CAN_SFF_MASK;
- rfilter[1].can_id = 0x200;
- rfilter[1].can_mask = 0x700;
-
- setsockopt(s, SOL_CAN_RAW, CAN_RAW_FILTER, &rfilter, sizeof(rfilter));
-
- To disable the reception of CAN frames on the selected CAN_RAW socket:
-
- setsockopt(s, SOL_CAN_RAW, CAN_RAW_FILTER, NULL, 0);
-
- To set the filters to zero filters is quite obsolete as to not read
- data causes the raw socket to discard the received CAN frames. But
- having this 'send only' use-case we may remove the receive list in the
- Kernel to save a little (really a very little!) CPU usage.
-
- 4.1.1.1 CAN filter usage optimisation
-
- The CAN filters are processed in per-device filter lists at CAN frame
- reception time. To reduce the number of checks that need to be performed
- while walking through the filter lists the CAN core provides an optimized
- filter handling when the filter subscription focusses on a single CAN ID.
-
- For the possible 2048 SFF CAN identifiers the identifier is used as an index
- to access the corresponding subscription list without any further checks.
- For the 2^29 possible EFF CAN identifiers a 10 bit XOR folding is used as
- hash function to retrieve the EFF table index.
-
- To benefit from the optimized filters for single CAN identifiers the
- CAN_SFF_MASK or CAN_EFF_MASK have to be set into can_filter.mask together
- with set CAN_EFF_FLAG and CAN_RTR_FLAG bits. A set CAN_EFF_FLAG bit in the
- can_filter.mask makes clear that it matters whether a SFF or EFF CAN ID is
- subscribed. E.g. in the example from above
-
- rfilter[0].can_id = 0x123;
- rfilter[0].can_mask = CAN_SFF_MASK;
-
- both SFF frames with CAN ID 0x123 and EFF frames with 0xXXXXX123 can pass.
-
- To filter for only 0x123 (SFF) and 0x12345678 (EFF) CAN identifiers the
- filter has to be defined in this way to benefit from the optimized filters:
-
- struct can_filter rfilter[2];
-
- rfilter[0].can_id = 0x123;
- rfilter[0].can_mask = (CAN_EFF_FLAG | CAN_RTR_FLAG | CAN_SFF_MASK);
- rfilter[1].can_id = 0x12345678 | CAN_EFF_FLAG;
- rfilter[1].can_mask = (CAN_EFF_FLAG | CAN_RTR_FLAG | CAN_EFF_MASK);
-
- setsockopt(s, SOL_CAN_RAW, CAN_RAW_FILTER, &rfilter, sizeof(rfilter));
-
- 4.1.2 RAW socket option CAN_RAW_ERR_FILTER
-
- As described in chapter 3.3 the CAN interface driver can generate so
- called Error Message Frames that can optionally be passed to the user
- application in the same way as other CAN frames. The possible
- errors are divided into different error classes that may be filtered
- using the appropriate error mask. To register for every possible
- error condition CAN_ERR_MASK can be used as value for the error mask.
- The values for the error mask are defined in linux/can/error.h .
-
- can_err_mask_t err_mask = ( CAN_ERR_TX_TIMEOUT | CAN_ERR_BUSOFF );
-
- setsockopt(s, SOL_CAN_RAW, CAN_RAW_ERR_FILTER,
- &err_mask, sizeof(err_mask));
-
- 4.1.3 RAW socket option CAN_RAW_LOOPBACK
-
- To meet multi user needs the local loopback is enabled by default
- (see chapter 3.2 for details). But in some embedded use-cases
- (e.g. when only one application uses the CAN bus) this loopback
- functionality can be disabled (separately for each socket):
-
- int loopback = 0; /* 0 = disabled, 1 = enabled (default) */
-
- setsockopt(s, SOL_CAN_RAW, CAN_RAW_LOOPBACK, &loopback, sizeof(loopback));
-
- 4.1.4 RAW socket option CAN_RAW_RECV_OWN_MSGS
-
- When the local loopback is enabled, all the sent CAN frames are
- looped back to the open CAN sockets that registered for the CAN
- frames' CAN-ID on this given interface to meet the multi user
- needs. The reception of the CAN frames on the same socket that was
- sending the CAN frame is assumed to be unwanted and therefore
- disabled by default. This default behaviour may be changed on
- demand:
-
- int recv_own_msgs = 1; /* 0 = disabled (default), 1 = enabled */
-
- setsockopt(s, SOL_CAN_RAW, CAN_RAW_RECV_OWN_MSGS,
- &recv_own_msgs, sizeof(recv_own_msgs));
-
- 4.1.5 RAW socket option CAN_RAW_FD_FRAMES
-
- CAN FD support in CAN_RAW sockets can be enabled with a new socket option
- CAN_RAW_FD_FRAMES which is off by default. When the new socket option is
- not supported by the CAN_RAW socket (e.g. on older kernels), switching the
- CAN_RAW_FD_FRAMES option returns the error -ENOPROTOOPT.
-
- Once CAN_RAW_FD_FRAMES is enabled the application can send both CAN frames
- and CAN FD frames. OTOH the application has to handle CAN and CAN FD frames
- when reading from the socket.
-
- CAN_RAW_FD_FRAMES enabled: CAN_MTU and CANFD_MTU are allowed
- CAN_RAW_FD_FRAMES disabled: only CAN_MTU is allowed (default)
-
- Example:
- [ remember: CANFD_MTU == sizeof(struct canfd_frame) ]
-
- struct canfd_frame cfd;
-
- nbytes = read(s, &cfd, CANFD_MTU);
-
- if (nbytes == CANFD_MTU) {
- printf("got CAN FD frame with length %d\n", cfd.len);
- /* cfd.flags contains valid data */
- } else if (nbytes == CAN_MTU) {
- printf("got legacy CAN frame with length %d\n", cfd.len);
- /* cfd.flags is undefined */
- } else {
- fprintf(stderr, "read: invalid CAN(FD) frame\n");
- return 1;
- }
-
- /* the content can be handled independently from the received MTU size */
-
- printf("can_id: %X data length: %d data: ", cfd.can_id, cfd.len);
- for (i = 0; i < cfd.len; i++)
- printf("%02X ", cfd.data[i]);
-
- When reading with size CANFD_MTU only returns CAN_MTU bytes that have
- been received from the socket a legacy CAN frame has been read into the
- provided CAN FD structure. Note that the canfd_frame.flags data field is
- not specified in the struct can_frame and therefore it is only valid in
- CANFD_MTU sized CAN FD frames.
-
- Implementation hint for new CAN applications:
-
- To build a CAN FD aware application use struct canfd_frame as basic CAN
- data structure for CAN_RAW based applications. When the application is
- executed on an older Linux kernel and switching the CAN_RAW_FD_FRAMES
- socket option returns an error: No problem. You'll get legacy CAN frames
- or CAN FD frames and can process them the same way.
-
- When sending to CAN devices make sure that the device is capable to handle
- CAN FD frames by checking if the device maximum transfer unit is CANFD_MTU.
- The CAN device MTU can be retrieved e.g. with a SIOCGIFMTU ioctl() syscall.
-
- 4.1.6 RAW socket option CAN_RAW_JOIN_FILTERS
-
- The CAN_RAW socket can set multiple CAN identifier specific filters that
- lead to multiple filters in the af_can.c filter processing. These filters
- are indenpendent from each other which leads to logical OR'ed filters when
- applied (see 4.1.1).
-
- This socket option joines the given CAN filters in the way that only CAN
- frames are passed to user space that matched *all* given CAN filters. The
- semantic for the applied filters is therefore changed to a logical AND.
-
- This is useful especially when the filterset is a combination of filters
- where the CAN_INV_FILTER flag is set in order to notch single CAN IDs or
- CAN ID ranges from the incoming traffic.
-
- 4.1.7 RAW socket returned message flags
-
- When using recvmsg() call, the msg->msg_flags may contain following flags:
-
- MSG_DONTROUTE: set when the received frame was created on the local host.
-
- MSG_CONFIRM: set when the frame was sent via the socket it is received on.
- This flag can be interpreted as a 'transmission confirmation' when the
- CAN driver supports the echo of frames on driver level, see 3.2 and 6.2.
- In order to receive such messages, CAN_RAW_RECV_OWN_MSGS must be set.
-
- 4.2 Broadcast Manager protocol sockets (SOCK_DGRAM)
-
- The Broadcast Manager protocol provides a command based configuration
- interface to filter and send (e.g. cyclic) CAN messages in kernel space.
-
- Receive filters can be used to down sample frequent messages; detect events
- such as message contents changes, packet length changes, and do time-out
- monitoring of received messages.
-
- Periodic transmission tasks of CAN frames or a sequence of CAN frames can be
- created and modified at runtime; both the message content and the two
- possible transmit intervals can be altered.
-
- A BCM socket is not intended for sending individual CAN frames using the
- struct can_frame as known from the CAN_RAW socket. Instead a special BCM
- configuration message is defined. The basic BCM configuration message used
- to communicate with the broadcast manager and the available operations are
- defined in the linux/can/bcm.h include. The BCM message consists of a
- message header with a command ('opcode') followed by zero or more CAN frames.
- The broadcast manager sends responses to user space in the same form:
-
- struct bcm_msg_head {
- __u32 opcode; /* command */
- __u32 flags; /* special flags */
- __u32 count; /* run 'count' times with ival1 */
- struct timeval ival1, ival2; /* count and subsequent interval */
- canid_t can_id; /* unique can_id for task */
- __u32 nframes; /* number of can_frames following */
- struct can_frame frames[0];
- };
-
- The aligned payload 'frames' uses the same basic CAN frame structure defined
- at the beginning of section 4 and in the include/linux/can.h include. All
- messages to the broadcast manager from user space have this structure.
-
- Note a CAN_BCM socket must be connected instead of bound after socket
- creation (example without error checking):
-
- int s;
- struct sockaddr_can addr;
- struct ifreq ifr;
-
- s = socket(PF_CAN, SOCK_DGRAM, CAN_BCM);
-
- strcpy(ifr.ifr_name, "can0");
- ioctl(s, SIOCGIFINDEX, &ifr);
-
- addr.can_family = AF_CAN;
- addr.can_ifindex = ifr.ifr_ifindex;
-
- connect(s, (struct sockaddr *)&addr, sizeof(addr));
-
- (..)
-
- The broadcast manager socket is able to handle any number of in flight
- transmissions or receive filters concurrently. The different RX/TX jobs are
- distinguished by the unique can_id in each BCM message. However additional
- CAN_BCM sockets are recommended to communicate on multiple CAN interfaces.
- When the broadcast manager socket is bound to 'any' CAN interface (=> the
- interface index is set to zero) the configured receive filters apply to any
- CAN interface unless the sendto() syscall is used to overrule the 'any' CAN
- interface index. When using recvfrom() instead of read() to retrieve BCM
- socket messages the originating CAN interface is provided in can_ifindex.
-
- 4.2.1 Broadcast Manager operations
-
- The opcode defines the operation for the broadcast manager to carry out,
- or details the broadcast managers response to several events, including
- user requests.
-
- Transmit Operations (user space to broadcast manager):
-
- TX_SETUP: Create (cyclic) transmission task.
-
- TX_DELETE: Remove (cyclic) transmission task, requires only can_id.
-
- TX_READ: Read properties of (cyclic) transmission task for can_id.
-
- TX_SEND: Send one CAN frame.
-
- Transmit Responses (broadcast manager to user space):
-
- TX_STATUS: Reply to TX_READ request (transmission task configuration).
-
- TX_EXPIRED: Notification when counter finishes sending at initial interval
- 'ival1'. Requires the TX_COUNTEVT flag to be set at TX_SETUP.
-
- Receive Operations (user space to broadcast manager):
-
- RX_SETUP: Create RX content filter subscription.
-
- RX_DELETE: Remove RX content filter subscription, requires only can_id.
-
- RX_READ: Read properties of RX content filter subscription for can_id.
-
- Receive Responses (broadcast manager to user space):
-
- RX_STATUS: Reply to RX_READ request (filter task configuration).
-
- RX_TIMEOUT: Cyclic message is detected to be absent (timer ival1 expired).
-
- RX_CHANGED: BCM message with updated CAN frame (detected content change).
- Sent on first message received or on receipt of revised CAN messages.
-
- 4.2.2 Broadcast Manager message flags
-
- When sending a message to the broadcast manager the 'flags' element may
- contain the following flag definitions which influence the behaviour:
-
- SETTIMER: Set the values of ival1, ival2 and count
-
- STARTTIMER: Start the timer with the actual values of ival1, ival2
- and count. Starting the timer leads simultaneously to emit a CAN frame.
-
- TX_COUNTEVT: Create the message TX_EXPIRED when count expires
-
- TX_ANNOUNCE: A change of data by the process is emitted immediately.
-
- TX_CP_CAN_ID: Copies the can_id from the message header to each
- subsequent frame in frames. This is intended as usage simplification. For
- TX tasks the unique can_id from the message header may differ from the
- can_id(s) stored for transmission in the subsequent struct can_frame(s).
-
- RX_FILTER_ID: Filter by can_id alone, no frames required (nframes=0).
-
- RX_CHECK_DLC: A change of the DLC leads to an RX_CHANGED.
-
- RX_NO_AUTOTIMER: Prevent automatically starting the timeout monitor.
-
- RX_ANNOUNCE_RESUME: If passed at RX_SETUP and a receive timeout occurred, a
- RX_CHANGED message will be generated when the (cyclic) receive restarts.
-
- TX_RESET_MULTI_IDX: Reset the index for the multiple frame transmission.
-
- RX_RTR_FRAME: Send reply for RTR-request (placed in op->frames[0]).
-
- 4.2.3 Broadcast Manager transmission timers
-
- Periodic transmission configurations may use up to two interval timers.
- In this case the BCM sends a number of messages ('count') at an interval
- 'ival1', then continuing to send at another given interval 'ival2'. When
- only one timer is needed 'count' is set to zero and only 'ival2' is used.
- When SET_TIMER and START_TIMER flag were set the timers are activated.
- The timer values can be altered at runtime when only SET_TIMER is set.
-
- 4.2.4 Broadcast Manager message sequence transmission
-
- Up to 256 CAN frames can be transmitted in a sequence in the case of a cyclic
- TX task configuration. The number of CAN frames is provided in the 'nframes'
- element of the BCM message head. The defined number of CAN frames are added
- as array to the TX_SETUP BCM configuration message.
-
- /* create a struct to set up a sequence of four CAN frames */
- struct {
- struct bcm_msg_head msg_head;
- struct can_frame frame[4];
- } mytxmsg;
-
- (..)
- mytxmsg.msg_head.nframes = 4;
- (..)
-
- write(s, &mytxmsg, sizeof(mytxmsg));
-
- With every transmission the index in the array of CAN frames is increased
- and set to zero at index overflow.
-
- 4.2.5 Broadcast Manager receive filter timers
-
- The timer values ival1 or ival2 may be set to non-zero values at RX_SETUP.
- When the SET_TIMER flag is set the timers are enabled:
-
- ival1: Send RX_TIMEOUT when a received message is not received again within
- the given time. When START_TIMER is set at RX_SETUP the timeout detection
- is activated directly - even without a former CAN frame reception.
-
- ival2: Throttle the received message rate down to the value of ival2. This
- is useful to reduce messages for the application when the signal inside the
- CAN frame is stateless as state changes within the ival2 periode may get
- lost.
-
- 4.2.6 Broadcast Manager multiplex message receive filter
-
- To filter for content changes in multiplex message sequences an array of more
- than one CAN frames can be passed in a RX_SETUP configuration message. The
- data bytes of the first CAN frame contain the mask of relevant bits that
- have to match in the subsequent CAN frames with the received CAN frame.
- If one of the subsequent CAN frames is matching the bits in that frame data
- mark the relevant content to be compared with the previous received content.
- Up to 257 CAN frames (multiplex filter bit mask CAN frame plus 256 CAN
- filters) can be added as array to the TX_SETUP BCM configuration message.
-
- /* usually used to clear CAN frame data[] - beware of endian problems! */
- #define U64_DATA(p) (*(unsigned long long*)(p)->data)
-
- struct {
- struct bcm_msg_head msg_head;
- struct can_frame frame[5];
- } msg;
-
- msg.msg_head.opcode = RX_SETUP;
- msg.msg_head.can_id = 0x42;
- msg.msg_head.flags = 0;
- msg.msg_head.nframes = 5;
- U64_DATA(&msg.frame[0]) = 0xFF00000000000000ULL; /* MUX mask */
- U64_DATA(&msg.frame[1]) = 0x01000000000000FFULL; /* data mask (MUX 0x01) */
- U64_DATA(&msg.frame[2]) = 0x0200FFFF000000FFULL; /* data mask (MUX 0x02) */
- U64_DATA(&msg.frame[3]) = 0x330000FFFFFF0003ULL; /* data mask (MUX 0x33) */
- U64_DATA(&msg.frame[4]) = 0x4F07FC0FF0000000ULL; /* data mask (MUX 0x4F) */
-
- write(s, &msg, sizeof(msg));
-
- 4.2.7 Broadcast Manager CAN FD support
-
- The programming API of the CAN_BCM depends on struct can_frame which is
- given as array directly behind the bcm_msg_head structure. To follow this
- schema for the CAN FD frames a new flag 'CAN_FD_FRAME' in the bcm_msg_head
- flags indicates that the concatenated CAN frame structures behind the
- bcm_msg_head are defined as struct canfd_frame.
-
- struct {
- struct bcm_msg_head msg_head;
- struct canfd_frame frame[5];
- } msg;
-
- msg.msg_head.opcode = RX_SETUP;
- msg.msg_head.can_id = 0x42;
- msg.msg_head.flags = CAN_FD_FRAME;
- msg.msg_head.nframes = 5;
- (..)
-
- When using CAN FD frames for multiplex filtering the MUX mask is still
- expected in the first 64 bit of the struct canfd_frame data section.
-
- 4.3 connected transport protocols (SOCK_SEQPACKET)
- 4.4 unconnected transport protocols (SOCK_DGRAM)
-
-
-5. SocketCAN core module
--------------------------
-
- The SocketCAN core module implements the protocol family
- PF_CAN. CAN protocol modules are loaded by the core module at
- runtime. The core module provides an interface for CAN protocol
- modules to subscribe needed CAN IDs (see chapter 3.1).
-
- 5.1 can.ko module params
-
- - stats_timer: To calculate the SocketCAN core statistics
- (e.g. current/maximum frames per second) this 1 second timer is
- invoked at can.ko module start time by default. This timer can be
- disabled by using stattimer=0 on the module commandline.
-
- - debug: (removed since SocketCAN SVN r546)
-
- 5.2 procfs content
-
- As described in chapter 3.1 the SocketCAN core uses several filter
- lists to deliver received CAN frames to CAN protocol modules. These
- receive lists, their filters and the count of filter matches can be
- checked in the appropriate receive list. All entries contain the
- device and a protocol module identifier:
-
- foo@bar:~$ cat /proc/net/can/rcvlist_all
-
- receive list 'rx_all':
- (vcan3: no entry)
- (vcan2: no entry)
- (vcan1: no entry)
- device can_id can_mask function userdata matches ident
- vcan0 000 00000000 f88e6370 f6c6f400 0 raw
- (any: no entry)
-
- In this example an application requests any CAN traffic from vcan0.
-
- rcvlist_all - list for unfiltered entries (no filter operations)
- rcvlist_eff - list for single extended frame (EFF) entries
- rcvlist_err - list for error message frames masks
- rcvlist_fil - list for mask/value filters
- rcvlist_inv - list for mask/value filters (inverse semantic)
- rcvlist_sff - list for single standard frame (SFF) entries
-
- Additional procfs files in /proc/net/can
-
- stats - SocketCAN core statistics (rx/tx frames, match ratios, ...)
- reset_stats - manual statistic reset
- version - prints the SocketCAN core version and the ABI version
-
- 5.3 writing own CAN protocol modules
-
- To implement a new protocol in the protocol family PF_CAN a new
- protocol has to be defined in include/linux/can.h .
- The prototypes and definitions to use the SocketCAN core can be
- accessed by including include/linux/can/core.h .
- In addition to functions that register the CAN protocol and the
- CAN device notifier chain there are functions to subscribe CAN
- frames received by CAN interfaces and to send CAN frames:
-
- can_rx_register - subscribe CAN frames from a specific interface
- can_rx_unregister - unsubscribe CAN frames from a specific interface
- can_send - transmit a CAN frame (optional with local loopback)
-
- For details see the kerneldoc documentation in net/can/af_can.c or
- the source code of net/can/raw.c or net/can/bcm.c .
-
-6. CAN network drivers
-----------------------
-
- Writing a CAN network device driver is much easier than writing a
- CAN character device driver. Similar to other known network device
- drivers you mainly have to deal with:
-
- - TX: Put the CAN frame from the socket buffer to the CAN controller.
- - RX: Put the CAN frame from the CAN controller to the socket buffer.
-
- See e.g. at Documentation/networking/netdevices.txt . The differences
- for writing CAN network device driver are described below:
-
- 6.1 general settings
-
- dev->type = ARPHRD_CAN; /* the netdevice hardware type */
- dev->flags = IFF_NOARP; /* CAN has no arp */
-
- dev->mtu = CAN_MTU; /* sizeof(struct can_frame) -> legacy CAN interface */
-
- or alternative, when the controller supports CAN with flexible data rate:
- dev->mtu = CANFD_MTU; /* sizeof(struct canfd_frame) -> CAN FD interface */
-
- The struct can_frame or struct canfd_frame is the payload of each socket
- buffer (skbuff) in the protocol family PF_CAN.
-
- 6.2 local loopback of sent frames
-
- As described in chapter 3.2 the CAN network device driver should
- support a local loopback functionality similar to the local echo
- e.g. of tty devices. In this case the driver flag IFF_ECHO has to be
- set to prevent the PF_CAN core from locally echoing sent frames
- (aka loopback) as fallback solution:
-
- dev->flags = (IFF_NOARP | IFF_ECHO);
-
- 6.3 CAN controller hardware filters
-
- To reduce the interrupt load on deep embedded systems some CAN
- controllers support the filtering of CAN IDs or ranges of CAN IDs.
- These hardware filter capabilities vary from controller to
- controller and have to be identified as not feasible in a multi-user
- networking approach. The use of the very controller specific
- hardware filters could make sense in a very dedicated use-case, as a
- filter on driver level would affect all users in the multi-user
- system. The high efficient filter sets inside the PF_CAN core allow
- to set different multiple filters for each socket separately.
- Therefore the use of hardware filters goes to the category 'handmade
- tuning on deep embedded systems'. The author is running a MPC603e
- @133MHz with four SJA1000 CAN controllers from 2002 under heavy bus
- load without any problems ...
-
- 6.4 The virtual CAN driver (vcan)
-
- Similar to the network loopback devices, vcan offers a virtual local
- CAN interface. A full qualified address on CAN consists of
-
- - a unique CAN Identifier (CAN ID)
- - the CAN bus this CAN ID is transmitted on (e.g. can0)
-
- so in common use cases more than one virtual CAN interface is needed.
-
- The virtual CAN interfaces allow the transmission and reception of CAN
- frames without real CAN controller hardware. Virtual CAN network
- devices are usually named 'vcanX', like vcan0 vcan1 vcan2 ...
- When compiled as a module the virtual CAN driver module is called vcan.ko
-
- Since Linux Kernel version 2.6.24 the vcan driver supports the Kernel
- netlink interface to create vcan network devices. The creation and
- removal of vcan network devices can be managed with the ip(8) tool:
-
- - Create a virtual CAN network interface:
- $ ip link add type vcan
-
- - Create a virtual CAN network interface with a specific name 'vcan42':
- $ ip link add dev vcan42 type vcan
-
- - Remove a (virtual CAN) network interface 'vcan42':
- $ ip link del vcan42
-
- 6.5 The CAN network device driver interface
-
- The CAN network device driver interface provides a generic interface
- to setup, configure and monitor CAN network devices. The user can then
- configure the CAN device, like setting the bit-timing parameters, via
- the netlink interface using the program "ip" from the "IPROUTE2"
- utility suite. The following chapter describes briefly how to use it.
- Furthermore, the interface uses a common data structure and exports a
- set of common functions, which all real CAN network device drivers
- should use. Please have a look to the SJA1000 or MSCAN driver to
- understand how to use them. The name of the module is can-dev.ko.
-
- 6.5.1 Netlink interface to set/get devices properties
-
- The CAN device must be configured via netlink interface. The supported
- netlink message types are defined and briefly described in
- "include/linux/can/netlink.h". CAN link support for the program "ip"
- of the IPROUTE2 utility suite is available and it can be used as shown
- below:
-
- - Setting CAN device properties:
-
- $ ip link set can0 type can help
- Usage: ip link set DEVICE type can
- [ bitrate BITRATE [ sample-point SAMPLE-POINT] ] |
- [ tq TQ prop-seg PROP_SEG phase-seg1 PHASE-SEG1
- phase-seg2 PHASE-SEG2 [ sjw SJW ] ]
-
- [ dbitrate BITRATE [ dsample-point SAMPLE-POINT] ] |
- [ dtq TQ dprop-seg PROP_SEG dphase-seg1 PHASE-SEG1
- dphase-seg2 PHASE-SEG2 [ dsjw SJW ] ]
-
- [ loopback { on | off } ]
- [ listen-only { on | off } ]
- [ triple-sampling { on | off } ]
- [ one-shot { on | off } ]
- [ berr-reporting { on | off } ]
- [ fd { on | off } ]
- [ fd-non-iso { on | off } ]
- [ presume-ack { on | off } ]
-
- [ restart-ms TIME-MS ]
- [ restart ]
-
- Where: BITRATE := { 1..1000000 }
- SAMPLE-POINT := { 0.000..0.999 }
- TQ := { NUMBER }
- PROP-SEG := { 1..8 }
- PHASE-SEG1 := { 1..8 }
- PHASE-SEG2 := { 1..8 }
- SJW := { 1..4 }
- RESTART-MS := { 0 | NUMBER }
-
- - Display CAN device details and statistics:
-
- $ ip -details -statistics link show can0
- 2: can0: <NOARP,UP,LOWER_UP,ECHO> mtu 16 qdisc pfifo_fast state UP qlen 10
- link/can
- can <TRIPLE-SAMPLING> state ERROR-ACTIVE restart-ms 100
- bitrate 125000 sample_point 0.875
- tq 125 prop-seg 6 phase-seg1 7 phase-seg2 2 sjw 1
- sja1000: tseg1 1..16 tseg2 1..8 sjw 1..4 brp 1..64 brp-inc 1
- clock 8000000
- re-started bus-errors arbit-lost error-warn error-pass bus-off
- 41 17457 0 41 42 41
- RX: bytes packets errors dropped overrun mcast
- 140859 17608 17457 0 0 0
- TX: bytes packets errors dropped carrier collsns
- 861 112 0 41 0 0
-
- More info to the above output:
-
- "<TRIPLE-SAMPLING>"
- Shows the list of selected CAN controller modes: LOOPBACK,
- LISTEN-ONLY, or TRIPLE-SAMPLING.
-
- "state ERROR-ACTIVE"
- The current state of the CAN controller: "ERROR-ACTIVE",
- "ERROR-WARNING", "ERROR-PASSIVE", "BUS-OFF" or "STOPPED"
-
- "restart-ms 100"
- Automatic restart delay time. If set to a non-zero value, a
- restart of the CAN controller will be triggered automatically
- in case of a bus-off condition after the specified delay time
- in milliseconds. By default it's off.
-
- "bitrate 125000 sample-point 0.875"
- Shows the real bit-rate in bits/sec and the sample-point in the
- range 0.000..0.999. If the calculation of bit-timing parameters
- is enabled in the kernel (CONFIG_CAN_CALC_BITTIMING=y), the
- bit-timing can be defined by setting the "bitrate" argument.
- Optionally the "sample-point" can be specified. By default it's
- 0.000 assuming CIA-recommended sample-points.
-
- "tq 125 prop-seg 6 phase-seg1 7 phase-seg2 2 sjw 1"
- Shows the time quanta in ns, propagation segment, phase buffer
- segment 1 and 2 and the synchronisation jump width in units of
- tq. They allow to define the CAN bit-timing in a hardware
- independent format as proposed by the Bosch CAN 2.0 spec (see
- chapter 8 of http://www.semiconductors.bosch.de/pdf/can2spec.pdf).
-
- "sja1000: tseg1 1..16 tseg2 1..8 sjw 1..4 brp 1..64 brp-inc 1
- clock 8000000"
- Shows the bit-timing constants of the CAN controller, here the
- "sja1000". The minimum and maximum values of the time segment 1
- and 2, the synchronisation jump width in units of tq, the
- bitrate pre-scaler and the CAN system clock frequency in Hz.
- These constants could be used for user-defined (non-standard)
- bit-timing calculation algorithms in user-space.
-
- "re-started bus-errors arbit-lost error-warn error-pass bus-off"
- Shows the number of restarts, bus and arbitration lost errors,
- and the state changes to the error-warning, error-passive and
- bus-off state. RX overrun errors are listed in the "overrun"
- field of the standard network statistics.
-
- 6.5.2 Setting the CAN bit-timing
-
- The CAN bit-timing parameters can always be defined in a hardware
- independent format as proposed in the Bosch CAN 2.0 specification
- specifying the arguments "tq", "prop_seg", "phase_seg1", "phase_seg2"
- and "sjw":
-
- $ ip link set canX type can tq 125 prop-seg 6 \
- phase-seg1 7 phase-seg2 2 sjw 1
-
- If the kernel option CONFIG_CAN_CALC_BITTIMING is enabled, CIA
- recommended CAN bit-timing parameters will be calculated if the bit-
- rate is specified with the argument "bitrate":
-
- $ ip link set canX type can bitrate 125000
-
- Note that this works fine for the most common CAN controllers with
- standard bit-rates but may *fail* for exotic bit-rates or CAN system
- clock frequencies. Disabling CONFIG_CAN_CALC_BITTIMING saves some
- space and allows user-space tools to solely determine and set the
- bit-timing parameters. The CAN controller specific bit-timing
- constants can be used for that purpose. They are listed by the
- following command:
-
- $ ip -details link show can0
- ...
- sja1000: clock 8000000 tseg1 1..16 tseg2 1..8 sjw 1..4 brp 1..64 brp-inc 1
-
- 6.5.3 Starting and stopping the CAN network device
-
- A CAN network device is started or stopped as usual with the command
- "ifconfig canX up/down" or "ip link set canX up/down". Be aware that
- you *must* define proper bit-timing parameters for real CAN devices
- before you can start it to avoid error-prone default settings:
-
- $ ip link set canX up type can bitrate 125000
-
- A device may enter the "bus-off" state if too many errors occurred on
- the CAN bus. Then no more messages are received or sent. An automatic
- bus-off recovery can be enabled by setting the "restart-ms" to a
- non-zero value, e.g.:
-
- $ ip link set canX type can restart-ms 100
-
- Alternatively, the application may realize the "bus-off" condition
- by monitoring CAN error message frames and do a restart when
- appropriate with the command:
-
- $ ip link set canX type can restart
-
- Note that a restart will also create a CAN error message frame (see
- also chapter 3.3).
-
- 6.6 CAN FD (flexible data rate) driver support
-
- CAN FD capable CAN controllers support two different bitrates for the
- arbitration phase and the payload phase of the CAN FD frame. Therefore a
- second bit timing has to be specified in order to enable the CAN FD bitrate.
-
- Additionally CAN FD capable CAN controllers support up to 64 bytes of
- payload. The representation of this length in can_frame.can_dlc and
- canfd_frame.len for userspace applications and inside the Linux network
- layer is a plain value from 0 .. 64 instead of the CAN 'data length code'.
- The data length code was a 1:1 mapping to the payload length in the legacy
- CAN frames anyway. The payload length to the bus-relevant DLC mapping is
- only performed inside the CAN drivers, preferably with the helper
- functions can_dlc2len() and can_len2dlc().
-
- The CAN netdevice driver capabilities can be distinguished by the network
- devices maximum transfer unit (MTU):
-
- MTU = 16 (CAN_MTU) => sizeof(struct can_frame) => 'legacy' CAN device
- MTU = 72 (CANFD_MTU) => sizeof(struct canfd_frame) => CAN FD capable device
-
- The CAN device MTU can be retrieved e.g. with a SIOCGIFMTU ioctl() syscall.
- N.B. CAN FD capable devices can also handle and send legacy CAN frames.
-
- When configuring CAN FD capable CAN controllers an additional 'data' bitrate
- has to be set. This bitrate for the data phase of the CAN FD frame has to be
- at least the bitrate which was configured for the arbitration phase. This
- second bitrate is specified analogue to the first bitrate but the bitrate
- setting keywords for the 'data' bitrate start with 'd' e.g. dbitrate,
- dsample-point, dsjw or dtq and similar settings. When a data bitrate is set
- within the configuration process the controller option "fd on" can be
- specified to enable the CAN FD mode in the CAN controller. This controller
- option also switches the device MTU to 72 (CANFD_MTU).
-
- The first CAN FD specification presented as whitepaper at the International
- CAN Conference 2012 needed to be improved for data integrity reasons.
- Therefore two CAN FD implementations have to be distinguished today:
-
- - ISO compliant: The ISO 11898-1:2015 CAN FD implementation (default)
- - non-ISO compliant: The CAN FD implementation following the 2012 whitepaper
-
- Finally there are three types of CAN FD controllers:
-
- 1. ISO compliant (fixed)
- 2. non-ISO compliant (fixed, like the M_CAN IP core v3.0.1 in m_can.c)
- 3. ISO/non-ISO CAN FD controllers (switchable, like the PEAK PCAN-USB FD)
-
- The current ISO/non-ISO mode is announced by the CAN controller driver via
- netlink and displayed by the 'ip' tool (controller option FD-NON-ISO).
- The ISO/non-ISO-mode can be altered by setting 'fd-non-iso {on|off}' for
- switchable CAN FD controllers only.
-
- Example configuring 500 kbit/s arbitration bitrate and 4 Mbit/s data bitrate:
-
- $ ip link set can0 up type can bitrate 500000 sample-point 0.75 \
- dbitrate 4000000 dsample-point 0.8 fd on
- $ ip -details link show can0
- 5: can0: <NOARP,UP,LOWER_UP,ECHO> mtu 72 qdisc pfifo_fast state UNKNOWN \
- mode DEFAULT group default qlen 10
- link/can promiscuity 0
- can <FD> state ERROR-ACTIVE (berr-counter tx 0 rx 0) restart-ms 0
- bitrate 500000 sample-point 0.750
- tq 50 prop-seg 14 phase-seg1 15 phase-seg2 10 sjw 1
- pcan_usb_pro_fd: tseg1 1..64 tseg2 1..16 sjw 1..16 brp 1..1024 \
- brp-inc 1
- dbitrate 4000000 dsample-point 0.800
- dtq 12 dprop-seg 7 dphase-seg1 8 dphase-seg2 4 dsjw 1
- pcan_usb_pro_fd: dtseg1 1..16 dtseg2 1..8 dsjw 1..4 dbrp 1..1024 \
- dbrp-inc 1
- clock 80000000
-
- Example when 'fd-non-iso on' is added on this switchable CAN FD adapter:
- can <FD,FD-NON-ISO> state ERROR-ACTIVE (berr-counter tx 0 rx 0) restart-ms 0
-
- 6.7 Supported CAN hardware
-
- Please check the "Kconfig" file in "drivers/net/can" to get an actual
- list of the support CAN hardware. On the SocketCAN project website
- (see chapter 7) there might be further drivers available, also for
- older kernel versions.
-
-7. SocketCAN resources
------------------------
-
- The Linux CAN / SocketCAN project resources (project site / mailing list)
- are referenced in the MAINTAINERS file in the Linux source tree.
- Search for CAN NETWORK [LAYERS|DRIVERS].
-
-8. Credits
-----------
-
- Oliver Hartkopp (PF_CAN core, filters, drivers, bcm, SJA1000 driver)
- Urs Thuermann (PF_CAN core, kernel integration, socket interfaces, raw, vcan)
- Jan Kizka (RT-SocketCAN core, Socket-API reconciliation)
- Wolfgang Grandegger (RT-SocketCAN core & drivers, Raw Socket-API reviews,
- CAN device driver interface, MSCAN driver)
- Robert Schwebel (design reviews, PTXdist integration)
- Marc Kleine-Budde (design reviews, Kernel 2.6 cleanups, drivers)
- Benedikt Spranger (reviews)
- Thomas Gleixner (LKML reviews, coding style, posting hints)
- Andrey Volkov (kernel subtree structure, ioctls, MSCAN driver)
- Matthias Brukner (first SJA1000 CAN netdevice implementation Q2/2003)
- Klaus Hitschler (PEAK driver integration)
- Uwe Koppe (CAN netdevices with PF_PACKET approach)
- Michael Schulze (driver layer loopback requirement, RT CAN drivers review)
- Pavel Pisa (Bit-timing calculation)
- Sascha Hauer (SJA1000 platform driver)
- Sebastian Haas (SJA1000 EMS PCI driver)
- Markus Plessing (SJA1000 EMS PCI driver)
- Per Dalen (SJA1000 Kvaser PCI driver)
- Sam Ravnborg (reviews, coding style, kbuild help)
diff --git a/Documentation/networking/dsa/dsa.txt b/Documentation/networking/dsa/dsa.txt
index b8b40753133e..25170ad7d25b 100644
--- a/Documentation/networking/dsa/dsa.txt
+++ b/Documentation/networking/dsa/dsa.txt
@@ -385,11 +385,6 @@ Switch configuration
avoid relying on what a previous software agent such as a bootloader/firmware
may have previously configured.
-- set_addr: Some switches require the programming of the management interface's
- Ethernet MAC address, switch drivers can also disable ageing of MAC addresses
- on the management interface and "hardcode"/"force" this MAC address for the
- CPU/management interface as an optimization
-
PHY devices and link management
-------------------------------
diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index 87814859cfc2..a4508ec1816b 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -1134,7 +1134,7 @@ The verifier's knowledge about the variable offset consists of:
mask and value; no bit should ever be 1 in both. For example, if a byte is read
into a register from memory, the register's top 56 bits are known zero, while
the low 8 are unknown - which is represented as the tnum (0x0; 0xff). If we
-then OR this with 0x40, we get (0x40; 0xcf), then if we add 1 we get (0x0;
+then OR this with 0x40, we get (0x40; 0xbf), then if we add 1 we get (0x0;
0x1ff), because of potential carries.
Besides arithmetic, the register state can also be updated by conditional
branches. For instance, if a SCALAR_VALUE is compared > 8, in the 'true' branch
diff --git a/Documentation/networking/ieee802154.txt b/Documentation/networking/ieee802154.txt
index 057e9fdbfac9..e74d8e1da0e2 100644
--- a/Documentation/networking/ieee802154.txt
+++ b/Documentation/networking/ieee802154.txt
@@ -97,6 +97,46 @@ The include/net/mac802154.h defines following functions:
- void ieee802154_unregister_hw(struct ieee802154_hw *hw):
freeing registered PHY
+ - void ieee802154_rx_irqsafe(struct ieee802154_hw *hw, struct sk_buff *skb,
+ u8 lqi):
+ telling 802.15.4 module there is a new received frame in the skb with
+ the RF Link Quality Indicator (LQI) from the hardware device
+
+ - void ieee802154_xmit_complete(struct ieee802154_hw *hw, struct sk_buff *skb,
+ bool ifs_handling):
+ telling 802.15.4 module the frame in the skb is or going to be
+ transmitted through the hardware device
+
+The device driver must implement the following callbacks in the IEEE 802.15.4
+operations structure at least:
+struct ieee802154_ops {
+ ...
+ int (*start)(struct ieee802154_hw *hw);
+ void (*stop)(struct ieee802154_hw *hw);
+ ...
+ int (*xmit_async)(struct ieee802154_hw *hw, struct sk_buff *skb);
+ int (*ed)(struct ieee802154_hw *hw, u8 *level);
+ int (*set_channel)(struct ieee802154_hw *hw, u8 page, u8 channel);
+ ...
+};
+
+ - int start(struct ieee802154_hw *hw):
+ handler that 802.15.4 module calls for the hardware device initialization.
+
+ - void stop(struct ieee802154_hw *hw):
+ handler that 802.15.4 module calls for the hardware device cleanup.
+
+ - int xmit_async(struct ieee802154_hw *hw, struct sk_buff *skb):
+ handler that 802.15.4 module calls for each frame in the skb going to be
+ transmitted through the hardware device.
+
+ - int ed(struct ieee802154_hw *hw, u8 *level):
+ handler that 802.15.4 module calls for Energy Detection from the hardware
+ device.
+
+ - int set_channel(struct ieee802154_hw *hw, u8 page, u8 channel):
+ set radio for listening on specific channel of the hardware device.
+
Moreover IEEE 802.15.4 device operations structure should be filled.
Fake drivers
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 66e620866245..90966c2692d8 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -7,8 +7,10 @@ Contents:
:maxdepth: 2
batman-adv
+ can
kapi
z8530book
+ msg_zerocopy
.. only:: subproject
@@ -16,4 +18,3 @@ Contents:
=======
* :ref:`genindex`
-
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 46c7e1085efc..3f2c40d8e6aa 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -606,6 +606,7 @@ tcp_fastopen_blackhole_timeout_sec - INTEGER
This time period will grow exponentially when more blackhole issues
get detected right after Fastopen is re-enabled and will reset to
initial value when the blackhole issue goes away.
+ 0 to disable the blackhole detection.
By default, it is set to 1hr.
tcp_syn_retries - INTEGER
diff --git a/Documentation/networking/kapi.rst b/Documentation/networking/kapi.rst
index 580289f345da..f03ae64be8bc 100644
--- a/Documentation/networking/kapi.rst
+++ b/Documentation/networking/kapi.rst
@@ -145,3 +145,27 @@ PHY Support
.. kernel-doc:: drivers/net/phy/mdio_bus.c
:internal:
+
+PHYLINK
+-------
+
+ PHYLINK interfaces traditional network drivers with PHYLIB, fixed-links,
+ and SFF modules (eg, hot-pluggable SFP) that may contain PHYs. PHYLINK
+ provides management of the link state and link modes.
+
+.. kernel-doc:: include/linux/phylink.h
+ :internal:
+
+.. kernel-doc:: drivers/net/phy/phylink.c
+
+SFP support
+-----------
+
+.. kernel-doc:: drivers/net/phy/sfp-bus.c
+ :internal:
+
+.. kernel-doc:: include/linux/sfp.h
+ :internal:
+
+.. kernel-doc:: drivers/net/phy/sfp-bus.c
+ :export:
diff --git a/Documentation/networking/msg_zerocopy.rst b/Documentation/networking/msg_zerocopy.rst
index 77f6d7e25cfd..291a01264967 100644
--- a/Documentation/networking/msg_zerocopy.rst
+++ b/Documentation/networking/msg_zerocopy.rst
@@ -72,6 +72,10 @@ this flag, a process must first signal intent by setting a socket option:
if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &one, sizeof(one)))
error(1, errno, "setsockopt zerocopy");
+Setting the socket option only works when the socket is in its initial
+(TCP_CLOSED) state. Trying to set the option for a socket returned by accept(),
+for example, will lead to an EBUSY error. In this case, the option should be set
+to the listening socket and it will be inherited by the accepted sockets.
Transmission
------------
diff --git a/Documentation/networking/netdev-features.txt b/Documentation/networking/netdev-features.txt
index 7413eb05223b..c77f9d57eb91 100644
--- a/Documentation/networking/netdev-features.txt
+++ b/Documentation/networking/netdev-features.txt
@@ -163,3 +163,12 @@ This requests that the NIC receive all possible frames, including errored
frames (such as bad FCS, etc). This can be helpful when sniffing a link with
bad packets on it. Some NICs may receive more packets if also put into normal
PROMISC mode.
+
+* rx-gro-hw
+
+This requests that the NIC enables Hardware GRO (generic receive offload).
+Hardware GRO is basically the exact reverse of TSO, and is generally
+stricter than Hardware LRO. A packet stream merged by Hardware GRO must
+be re-segmentable by GSO or TSO back to the exact original packet stream.
+Hardware GRO is dependent on RXCSUM since every packet successfully merged
+by hardware must also have the checksum verified by hardware.
diff --git a/Documentation/networking/pktgen.txt b/Documentation/networking/pktgen.txt
index 2c4e3354e128..d2fd78f85aa4 100644
--- a/Documentation/networking/pktgen.txt
+++ b/Documentation/networking/pktgen.txt
@@ -12,8 +12,8 @@ suitable sample script and configure that.
On a dual CPU:
ps aux | grep pkt
-root 129 0.3 0.0 0 0 ? SW 2003 523:20 [pktgen/0]
-root 130 0.3 0.0 0 0 ? SW 2003 509:50 [pktgen/1]
+root 129 0.3 0.0 0 0 ? SW 2003 523:20 [kpktgend_0]
+root 130 0.3 0.0 0 0 ? SW 2003 509:50 [kpktgend_1]
For monitoring and control pktgen creates:
@@ -113,9 +113,16 @@ Configuring devices
===================
This is done via the /proc interface, and most easily done via pgset
as defined in the sample scripts.
+You need to specify PGDEV environment variable to use functions from sample
+scripts, i.e.:
+export PGDEV=/proc/net/pktgen/eth4@0
+source samples/pktgen/functions.sh
Examples:
+ pg_ctrl start starts injection.
+ pg_ctrl stop aborts injection. Also, ^C aborts generator.
+
pgset "clone_skb 1" sets the number of copies of the same packet
pgset "clone_skb 0" use single SKB for all transmits
pgset "burst 8" uses xmit_more API to queue 8 copies of the same
@@ -165,8 +172,12 @@ Examples:
IPSEC # IPsec encapsulation (needs CONFIG_XFRM)
NODE_ALLOC # node specific memory allocation
NO_TIMESTAMP # disable timestamping
+ pgset 'flag ![name]' Clear a flag to determine behaviour.
+ Note that you might need to use single quote in
+ interactive mode, so that your shell wouldn't expand
+ the specified flag as a history command.
- pgset spi SPI_VALUE Set specific SA used to transform packet.
+ pgset "spi [SPI_VALUE]" Set specific SA used to transform packet.
pgset "udp_src_min 9" set UDP source port min, If < udp_src_max, then
cycle through the port range.
@@ -207,8 +218,6 @@ Examples:
pgset "tos XX" set former IPv4 TOS field (e.g. "tos 28" for AF11 no ECN, default 00)
pgset "traffic_class XX" set former IPv6 TRAFFIC CLASS (e.g. "traffic_class B8" for EF no ECN, default 00)
- pgset stop aborts injection. Also, ^C aborts generator.
-
pgset "rate 300M" set rate to 300 Mb/s
pgset "ratep 1000000" set rate to 1Mpps
diff --git a/Documentation/networking/xfrm_device.txt b/Documentation/networking/xfrm_device.txt
new file mode 100644
index 000000000000..50c34ca65efe
--- /dev/null
+++ b/Documentation/networking/xfrm_device.txt
@@ -0,0 +1,135 @@
+
+===============================================
+XFRM device - offloading the IPsec computations
+===============================================
+Shannon Nelson <shannon.nelson@oracle.com>
+
+
+Overview
+========
+
+IPsec is a useful feature for securing network traffic, but the
+computational cost is high: a 10Gbps link can easily be brought down
+to under 1Gbps, depending on the traffic and link configuration.
+Luckily, there are NICs that offer a hardware based IPsec offload which
+can radically increase throughput and decrease CPU utilization. The XFRM
+Device interface allows NIC drivers to offer to the stack access to the
+hardware offload.
+
+Userland access to the offload is typically through a system such as
+libreswan or KAME/raccoon, but the iproute2 'ip xfrm' command set can
+be handy when experimenting. An example command might look something
+like this:
+
+ ip x s add proto esp dst 14.0.0.70 src 14.0.0.52 spi 0x07 mode transport \
+ reqid 0x07 replay-window 32 \
+ aead 'rfc4106(gcm(aes))' 0x44434241343332312423222114131211f4f3f2f1 128 \
+ sel src 14.0.0.52/24 dst 14.0.0.70/24 proto tcp \
+ offload dev eth4 dir in
+
+Yes, that's ugly, but that's what shell scripts and/or libreswan are for.
+
+
+
+Callbacks to implement
+======================
+
+/* from include/linux/netdevice.h */
+struct xfrmdev_ops {
+ int (*xdo_dev_state_add) (struct xfrm_state *x);
+ void (*xdo_dev_state_delete) (struct xfrm_state *x);
+ void (*xdo_dev_state_free) (struct xfrm_state *x);
+ bool (*xdo_dev_offload_ok) (struct sk_buff *skb,
+ struct xfrm_state *x);
+ void (*xdo_dev_state_advance_esn) (struct xfrm_state *x);
+};
+
+The NIC driver offering ipsec offload will need to implement these
+callbacks to make the offload available to the network stack's
+XFRM subsytem. Additionally, the feature bits NETIF_F_HW_ESP and
+NETIF_F_HW_ESP_TX_CSUM will signal the availability of the offload.
+
+
+
+Flow
+====
+
+At probe time and before the call to register_netdev(), the driver should
+set up local data structures and XFRM callbacks, and set the feature bits.
+The XFRM code's listener will finish the setup on NETDEV_REGISTER.
+
+ adapter->netdev->xfrmdev_ops = &ixgbe_xfrmdev_ops;
+ adapter->netdev->features |= NETIF_F_HW_ESP;
+ adapter->netdev->hw_enc_features |= NETIF_F_HW_ESP;
+
+When new SAs are set up with a request for "offload" feature, the
+driver's xdo_dev_state_add() will be given the new SA to be offloaded
+and an indication of whether it is for Rx or Tx. The driver should
+ - verify the algorithm is supported for offloads
+ - store the SA information (key, salt, target-ip, protocol, etc)
+ - enable the HW offload of the SA
+
+The driver can also set an offload_handle in the SA, an opaque void pointer
+that can be used to convey context into the fast-path offload requests.
+
+ xs->xso.offload_handle = context;
+
+
+When the network stack is preparing an IPsec packet for an SA that has
+been setup for offload, it first calls into xdo_dev_offload_ok() with
+the skb and the intended offload state to ask the driver if the offload
+will serviceable. This can check the packet information to be sure the
+offload can be supported (e.g. IPv4 or IPv6, no IPv4 options, etc) and
+return true of false to signify its support.
+
+When ready to send, the driver needs to inspect the Tx packet for the
+offload information, including the opaque context, and set up the packet
+send accordingly.
+
+ xs = xfrm_input_state(skb);
+ context = xs->xso.offload_handle;
+ set up HW for send
+
+The stack has already inserted the appropriate IPsec headers in the
+packet data, the offload just needs to do the encryption and fix up the
+header values.
+
+
+When a packet is received and the HW has indicated that it offloaded a
+decryption, the driver needs to add a reference to the decoded SA into
+the packet's skb. At this point the data should be decrypted but the
+IPsec headers are still in the packet data; they are removed later up
+the stack in xfrm_input().
+
+ find and hold the SA that was used to the Rx skb
+ get spi, protocol, and destination IP from packet headers
+ xs = find xs from (spi, protocol, dest_IP)
+ xfrm_state_hold(xs);
+
+ store the state information into the skb
+ skb->sp = secpath_dup(skb->sp);
+ skb->sp->xvec[skb->sp->len++] = xs;
+ skb->sp->olen++;
+
+ indicate the success and/or error status of the offload
+ xo = xfrm_offload(skb);
+ xo->flags = CRYPTO_DONE;
+ xo->status = crypto_status;
+
+ hand the packet to napi_gro_receive() as usual
+
+In ESN mode, xdo_dev_state_advance_esn() is called from xfrm_replay_advance_esn().
+Driver will check packet seq number and update HW ESN state machine if needed.
+
+When the SA is removed by the user, the driver's xdo_dev_state_delete()
+is asked to disable the offload. Later, xdo_dev_state_free() is called
+from a garbage collection routine after all reference counts to the state
+have been removed and any remaining resources can be cleared for the
+offload state. How these are used by the driver will depend on specific
+hardware needs.
+
+As a netdev is set to DOWN the XFRM stack's netdev listener will call
+xdo_dev_state_delete() and xdo_dev_state_free() on any remaining offloaded
+states.
+
+
diff --git a/Documentation/networking/xfrm_proc.txt b/Documentation/networking/xfrm_proc.txt
index d0d8bafa9016..2eae619ab67b 100644
--- a/Documentation/networking/xfrm_proc.txt
+++ b/Documentation/networking/xfrm_proc.txt
@@ -5,13 +5,15 @@ Masahide NAKAMURA <nakam@linux-ipv6.org>
Transformation Statistics
-------------------------
-xfrm_proc is a statistics shown factor dropped by transformation
-for developer.
-It is a counter designed from current transformation source code
-and defined like linux private MIB.
-Inbound statistics
-~~~~~~~~~~~~~~~~~~
+The xfrm_proc code is a set of statistics showing numbers of packets
+dropped by the transformation code and why. These counters are defined
+as part of the linux private MIB. These counters can be viewed in
+/proc/net/xfrm_stat.
+
+
+Inbound errors
+~~~~~~~~~~~~~~
XfrmInError:
All errors which is not matched others
XfrmInBufferError:
@@ -46,6 +48,10 @@ XfrmInPolBlock:
Policy discards
XfrmInPolError:
Policy error
+XfrmAcquireError:
+ State hasn't been fully acquired before use
+XfrmFwdHdrError:
+ Forward routing of a packet is not allowed
Outbound errors
~~~~~~~~~~~~~~~
@@ -72,3 +78,5 @@ XfrmOutPolDead:
Policy is dead
XfrmOutPolError:
Policy error
+XfrmOutStateInvalid:
+ State is invalid, perhaps expired
diff --git a/Documentation/perf/arm_dsu_pmu.txt b/Documentation/perf/arm_dsu_pmu.txt
new file mode 100644
index 000000000000..d611e15f5add
--- /dev/null
+++ b/Documentation/perf/arm_dsu_pmu.txt
@@ -0,0 +1,28 @@
+ARM DynamIQ Shared Unit (DSU) PMU
+==================================
+
+ARM DynamIQ Shared Unit integrates one or more cores with an L3 memory system,
+control logic and external interfaces to form a multicore cluster. The PMU
+allows counting the various events related to the L3 cache, Snoop Control Unit
+etc, using 32bit independent counters. It also provides a 64bit cycle counter.
+
+The PMU can only be accessed via CPU system registers and are common to the
+cores connected to the same DSU. Like most of the other uncore PMUs, DSU
+PMU doesn't support process specific events and cannot be used in sampling mode.
+
+The DSU provides a bitmap for a subset of implemented events via hardware
+registers. There is no way for the driver to determine if the other events
+are available or not. Hence the driver exposes only those events advertised
+by the DSU, in "events" directory under :
+
+ /sys/bus/event_sources/devices/arm_dsu_<N>/
+
+The user should refer to the TRM of the product to figure out the supported events
+and use the raw event code for the unlisted events.
+
+The driver also exposes the CPUs connected to the DSU instance in "associated_cpus".
+
+
+e.g usage :
+
+ perf stat -a -e arm_dsu_0/cycles/
diff --git a/Documentation/power/pci.txt b/Documentation/power/pci.txt
index 704cd36079b8..8eaf9ee24d43 100644
--- a/Documentation/power/pci.txt
+++ b/Documentation/power/pci.txt
@@ -994,6 +994,17 @@ into D0 going forward), but if it is in runtime suspend in pci_pm_thaw_noirq(),
the function will set the power.direct_complete flag for it (to make the PM core
skip the subsequent "thaw" callbacks for it) and return.
+Setting the DPM_FLAG_LEAVE_SUSPENDED flag means that the driver prefers the
+device to be left in suspend after system-wide transitions to the working state.
+This flag is checked by the PM core, but the PCI bus type informs the PM core
+which devices may be left in suspend from its perspective (that happens during
+the "noirq" phase of system-wide suspend and analogous transitions) and next it
+uses the dev_pm_may_skip_resume() helper to decide whether or not to return from
+pci_pm_resume_noirq() early, as the PM core will skip the remaining resume
+callbacks for the device during the transition under way and will set its
+runtime PM status to "suspended" if dev_pm_may_skip_resume() returns "true" for
+it.
+
3.2. Device Runtime Power Management
------------------------------------
In addition to providing device power management callbacks PCI device drivers
diff --git a/Documentation/power/regulator/machine.txt b/Documentation/power/regulator/machine.txt
index 757e3b53dc11..eff4dcaaa252 100644
--- a/Documentation/power/regulator/machine.txt
+++ b/Documentation/power/regulator/machine.txt
@@ -23,16 +23,12 @@ struct regulator_consumer_supply {
e.g. for the machine above
static struct regulator_consumer_supply regulator1_consumers[] = {
-{
- .dev_name = "dev_name(consumer B)",
- .supply = "Vcc",
-},};
+ REGULATOR_SUPPLY("Vcc", "consumer B"),
+};
static struct regulator_consumer_supply regulator2_consumers[] = {
-{
- .dev = "dev_name(consumer A"),
- .supply = "Vcc",
-},};
+ REGULATOR_SUPPLY("Vcc", "consumer A"),
+};
This maps Regulator-1 to the 'Vcc' supply for Consumer B and maps Regulator-2
to the 'Vcc' supply for Consumer A.
@@ -78,20 +74,20 @@ static struct regulator_init_data regulator2_data = {
Finally the regulator devices must be registered in the usual manner.
static struct platform_device regulator_devices[] = {
-{
- .name = "regulator",
- .id = DCDC_1,
- .dev = {
- .platform_data = &regulator1_data,
+ {
+ .name = "regulator",
+ .id = DCDC_1,
+ .dev = {
+ .platform_data = &regulator1_data,
+ },
},
-},
-{
- .name = "regulator",
- .id = DCDC_2,
- .dev = {
- .platform_data = &regulator2_data,
+ {
+ .name = "regulator",
+ .id = DCDC_2,
+ .dev = {
+ .platform_data = &regulator2_data,
+ },
},
-},
};
/* register regulator 1 device */
platform_device_register(&regulator_devices[0]);
diff --git a/Documentation/process/kernel-enforcement-statement.rst b/Documentation/process/kernel-enforcement-statement.rst
index b3170671a1df..6816c12d6956 100644
--- a/Documentation/process/kernel-enforcement-statement.rst
+++ b/Documentation/process/kernel-enforcement-statement.rst
@@ -68,6 +68,7 @@ we might work for today, have in the past, or will in the future.
- Paul Burton
- Javier Martinez Canillas
- Rob Clark
+ - Kees Cook (Google)
- Jonathan Corbet
- Dennis Dalessandro
- Vivien Didelot (Savoir-faire Linux)
@@ -118,6 +119,7 @@ we might work for today, have in the past, or will in the future.
- Mike Marshall
- Chris Mason
- Paul E. McKenney
+ - Arnaldo Carvalho de Melo
- David S. Miller
- Ingo Molnar
- Kuninori Morimoto
@@ -136,6 +138,7 @@ we might work for today, have in the past, or will in the future.
- Anna Schumaker
- Jes Sorensen
- K.Y. Srinivasan
+ - David Sterba (SUSE)
- Heiko Stuebner
- Jiri Kosina (SUSE)
- Willy Tarreau
@@ -143,6 +146,7 @@ we might work for today, have in the past, or will in the future.
- Linus Torvalds
- Thierry Reding
- Rik van Riel
+ - Luis R. Rodriguez
- Geert Uytterhoeven (Glider bvba)
- Eduardo Valentin (Amazon.com)
- Daniel Vetter
diff --git a/Documentation/process/license-rules.rst b/Documentation/process/license-rules.rst
new file mode 100644
index 000000000000..408f77dc6157
--- /dev/null
+++ b/Documentation/process/license-rules.rst
@@ -0,0 +1,370 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Linux kernel licensing rules
+============================
+
+The Linux Kernel is provided under the terms of the GNU General Public
+License version 2 only (GPL-2.0), as published by the Free Software
+Foundation, and provided in the COPYING file. This documentation file is
+not meant to replace the COPYING file, but provides a description of how
+each source file should be annotated to make the licensing it is governed
+under clear and unambiguous.
+
+The license in the COPYING file applies to the kernel source as a whole,
+though individual source files can have a different license which is
+required to be compatible with the GPL-2.0::
+
+ GPL-1.0+ : GNU General Public License v1.0 or later
+ GPL-2.0+ : GNU General Public License v2.0 or later
+ LGPL-2.0 : GNU Library General Public License v2 only
+ LGPL-2.0+ : GNU Library General Public License v2 or later
+ LGPL-2.1 : GNU Lesser General Public License v2.1 only
+ LGPL-2.1+ : GNU Lesser General Public License v2.1 or later
+
+Aside from that, individual files can be provided under a dual license,
+e.g. one of the compatible GPL variants and alternatively under a
+permissive license like BSD, MIT etc.
+
+The User-space API (UAPI) header files, which describe the interface of
+user-space programs to the kernel are a special case. According to the
+note in the kernel COPYING file, the syscall interface is a clear boundary,
+which does not extend the GPL requirements to any software which uses it to
+communicate with the kernel. Because the UAPI headers must be includable
+into any source files which create an executable running on the Linux
+kernel, the exception must be documented by a special license expression.
+
+The common way of expressing the license of a source file is to add the
+matching boilerplate text into the top comment of the file. Due to
+formatting, typos etc. these "boilerplates" are hard to validate for
+tools which are used in the context of license compliance.
+
+An alternative to boilerplate text is the use of Software Package Data
+Exchange (SPDX) license identifiers in each source file. SPDX license
+identifiers are machine parsable and precise shorthands for the license
+under which the content of the file is contributed. SPDX license
+identifiers are managed by the SPDX Workgroup at the Linux Foundation and
+have been agreed on by partners throughout the industry, tool vendors, and
+legal teams. For further information see https://spdx.org/
+
+The Linux kernel requires the precise SPDX identifier in all source files.
+The valid identifiers used in the kernel are explained in the section
+`License identifiers`_ and have been retrieved from the official SPDX
+license list at https://spdx.org/licenses/ along with the license texts.
+
+License identifier syntax
+-------------------------
+
+1. Placement:
+
+ The SPDX license identifier in kernel files shall be added at the first
+ possible line in a file which can contain a comment. For the majority
+ or files this is the first line, except for scripts which require the
+ '#!PATH_TO_INTERPRETER' in the first line. For those scripts the SPDX
+ identifier goes into the second line.
+
+|
+
+2. Style:
+
+ The SPDX license identifier is added in form of a comment. The comment
+ style depends on the file type::
+
+ C source: // SPDX-License-Identifier: <SPDX License Expression>
+ C header: /* SPDX-License-Identifier: <SPDX License Expression> */
+ ASM: /* SPDX-License-Identifier: <SPDX License Expression> */
+ scripts: # SPDX-License-Identifier: <SPDX License Expression>
+ .rst: .. SPDX-License-Identifier: <SPDX License Expression>
+ .dts{i}: // SPDX-License-Identifier: <SPDX License Expression>
+
+ If a specific tool cannot handle the standard comment style, then the
+ appropriate comment mechanism which the tool accepts shall be used. This
+ is the reason for having the "/\* \*/" style comment in C header
+ files. There was build breakage observed with generated .lds files where
+ 'ld' failed to parse the C++ comment. This has been fixed by now, but
+ there are still older assembler tools which cannot handle C++ style
+ comments.
+
+|
+
+3. Syntax:
+
+ A <SPDX License Expression> is either an SPDX short form license
+ identifier found on the SPDX License List, or the combination of two
+ SPDX short form license identifiers separated by "WITH" when a license
+ exception applies. When multiple licenses apply, an expression consists
+ of keywords "AND", "OR" separating sub-expressions and surrounded by
+ "(", ")" .
+
+ License identifiers for licenses like [L]GPL with the 'or later' option
+ are constructed by using a "+" for indicating the 'or later' option.::
+
+ // SPDX-License-Identifier: GPL-2.0+
+ // SPDX-License-Identifier: LGPL-2.1+
+
+ WITH should be used when there is a modifier to a license needed.
+ For example, the linux kernel UAPI files use the expression::
+
+ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+ // SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note
+
+ Other examples using WITH exceptions found in the kernel are::
+
+ // SPDX-License-Identifier: GPL-2.0 WITH mif-exception
+ // SPDX-License-Identifier: GPL-2.0+ WITH GCC-exception-2.0
+
+ Exceptions can only be used with particular License identifiers. The
+ valid License identifiers are listed in the tags of the exception text
+ file. For details see the point `Exceptions`_ in the chapter `License
+ identifiers`_.
+
+ OR should be used if the file is dual licensed and only one license is
+ to be selected. For example, some dtsi files are available under dual
+ licenses::
+
+ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+ Examples from the kernel for license expressions in dual licensed files::
+
+ // SPDX-License-Identifier: GPL-2.0 OR MIT
+ // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+ // SPDX-License-Identifier: GPL-2.0 OR Apache-2.0
+ // SPDX-License-Identifier: GPL-2.0 OR MPL-1.1
+ // SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT
+ // SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause OR OpenSSL
+
+ AND should be used if the file has multiple licenses whose terms all
+ apply to use the file. For example, if code is inherited from another
+ project and permission has been given to put it in the kernel, but the
+ original license terms need to remain in effect::
+
+ // SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) AND MIT
+
+ Another other example where both sets of license terms need to be
+ adhered to is::
+
+ // SPDX-License-Identifier: GPL-1.0+ AND LGPL-2.1+
+
+License identifiers
+-------------------
+
+The licenses currently used, as well as the licenses for code added to the
+kernel, can be broken down into:
+
+1. _`Preferred licenses`:
+
+ Whenever possible these licenses should be used as they are known to be
+ fully compatible and widely used. These licenses are available from the
+ directory::
+
+ LICENSES/preferred/
+
+ in the kernel source tree.
+
+ The files in this directory contain the full license text and
+ `Metatags`_. The file names are identical to the SPDX license
+ identifier which shall be used for the license in source files.
+
+ Examples::
+
+ LICENSES/preferred/GPL-2.0
+
+ Contains the GPL version 2 license text and the required metatags::
+
+ LICENSES/preferred/MIT
+
+ Contains the MIT license text and the required metatags
+
+ _`Metatags`:
+
+ The following meta tags must be available in a license file:
+
+ - Valid-License-Identifier:
+
+ One or more lines which declare which License Identifiers are valid
+ inside the project to reference this particular license text. Usually
+ this is a single valid identifier, but e.g. for licenses with the 'or
+ later' options two identifiers are valid.
+
+ - SPDX-URL:
+
+ The URL of the SPDX page which contains additional information related
+ to the license.
+
+ - Usage-Guidance:
+
+ Freeform text for usage advice. The text must include correct examples
+ for the SPDX license identifiers as they should be put into source
+ files according to the `License identifier syntax`_ guidelines.
+
+ - License-Text:
+
+ All text after this tag is treated as the original license text
+
+ File format examples::
+
+ Valid-License-Identifier: GPL-2.0
+ Valid-License-Identifier: GPL-2.0+
+ SPDX-URL: https://spdx.org/licenses/GPL-2.0.html
+ Usage-Guide:
+ To use this license in source code, put one of the following SPDX
+ tag/value pairs into a comment according to the placement
+ guidelines in the licensing rules documentation.
+ For 'GNU General Public License (GPL) version 2 only' use:
+ SPDX-License-Identifier: GPL-2.0
+ For 'GNU General Public License (GPL) version 2 or any later version' use:
+ SPDX-License-Identifier: GPL-2.0+
+ License-Text:
+ Full license text
+
+ ::
+
+ SPDX-License-Identifier: MIT
+ SPDX-URL: https://spdx.org/licenses/MIT.html
+ Usage-Guide:
+ To use this license in source code, put the following SPDX
+ tag/value pair into a comment according to the placement
+ guidelines in the licensing rules documentation.
+ SPDX-License-Identifier: MIT
+ License-Text:
+ Full license text
+
+|
+
+2. Not recommended licenses:
+
+ These licenses should only be used for existing code or for importing
+ code from a different project. These licenses are available from the
+ directory::
+
+ LICENSES/other/
+
+ in the kernel source tree.
+
+ The files in this directory contain the full license text and
+ `Metatags`_. The file names are identical to the SPDX license
+ identifier which shall be used for the license in source files.
+
+ Examples::
+
+ LICENSES/other/ISC
+
+ Contains the Internet Systems Consortium license text and the required
+ metatags::
+
+ LICENSES/other/ZLib
+
+ Contains the ZLIB license text and the required metatags.
+
+ Metatags:
+
+ The metatag requirements for 'other' licenses are identical to the
+ requirements of the `Preferred licenses`_.
+
+ File format example::
+
+ Valid-License-Identifier: ISC
+ SPDX-URL: https://spdx.org/licenses/ISC.html
+ Usage-Guide:
+ Usage of this license in the kernel for new code is discouraged
+ and it should solely be used for importing code from an already
+ existing project.
+ To use this license in source code, put the following SPDX
+ tag/value pair into a comment according to the placement
+ guidelines in the licensing rules documentation.
+ SPDX-License-Identifier: ISC
+ License-Text:
+ Full license text
+
+|
+
+3. _`Exceptions`:
+
+ Some licenses can be amended with exceptions which grant certain rights
+ which the original license does not. These exceptions are available
+ from the directory::
+
+ LICENSES/exceptions/
+
+ in the kernel source tree. The files in this directory contain the full
+ exception text and the required `Exception Metatags`_.
+
+ Examples::
+
+ LICENSES/exceptions/Linux-syscall-note
+
+ Contains the Linux syscall exception as documented in the COPYING
+ file of the Linux kernel, which is used for UAPI header files.
+ e.g. /\* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note \*/::
+
+ LICENSES/exceptions/GCC-exception-2.0
+
+ Contains the GCC 'linking exception' which allows to link any binary
+ independent of its license against the compiled version of a file marked
+ with this exception. This is required for creating runnable executables
+ from source code which is not compatible with the GPL.
+
+ _`Exception Metatags`:
+
+ The following meta tags must be available in an exception file:
+
+ - SPDX-Exception-Identifier:
+
+ One exception identifier which can be used with SPDX license
+ identifiers.
+
+ - SPDX-URL:
+
+ The URL of the SPDX page which contains additional information related
+ to the exception.
+
+ - SPDX-Licenses:
+
+ A comma separated list of SPDX license identifiers for which the
+ exception can be used.
+
+ - Usage-Guidance:
+
+ Freeform text for usage advice. The text must be followed by correct
+ examples for the SPDX license identifiers as they should be put into
+ source files according to the `License identifier syntax`_ guidelines.
+
+ - Exception-Text:
+
+ All text after this tag is treated as the original exception text
+
+ File format examples::
+
+ SPDX-Exception-Identifier: Linux-syscall-note
+ SPDX-URL: https://spdx.org/licenses/Linux-syscall-note.html
+ SPDX-Licenses: GPL-2.0, GPL-2.0+, GPL-1.0+, LGPL-2.0, LGPL-2.0+, LGPL-2.1, LGPL-2.1+
+ Usage-Guidance:
+ This exception is used together with one of the above SPDX-Licenses
+ to mark user-space API (uapi) header files so they can be included
+ into non GPL compliant user-space application code.
+ To use this exception add it with the keyword WITH to one of the
+ identifiers in the SPDX-Licenses tag:
+ SPDX-License-Identifier: <SPDX-License> WITH Linux-syscall-note
+ Exception-Text:
+ Full exception text
+
+ ::
+
+ SPDX-Exception-Identifier: GCC-exception-2.0
+ SPDX-URL: https://spdx.org/licenses/GCC-exception-2.0.html
+ SPDX-Licenses: GPL-2.0, GPL-2.0+
+ Usage-Guidance:
+ The "GCC Runtime Library exception 2.0" is used together with one
+ of the above SPDX-Licenses for code imported from the GCC runtime
+ library.
+ To use this exception add it with the keyword WITH to one of the
+ identifiers in the SPDX-Licenses tag:
+ SPDX-License-Identifier: <SPDX-License> WITH GCC-exception-2.0
+ Exception-Text:
+ Full exception text
+
+
+All SPDX license identifiers and exceptions must have a corresponding file
+in the LICENSE subdirectories. This is required to allow tool
+verification (e.g. checkpatch.pl) and to have the licenses ready to read
+and extract right from the source, which is recommended by various FOSS
+organizations, e.g. the `FSFE REUSE initiative <https://reuse.software/>`_.
diff --git a/Documentation/process/submit-checklist.rst b/Documentation/process/submit-checklist.rst
index a0d9d34bfb6d..367353c54949 100644
--- a/Documentation/process/submit-checklist.rst
+++ b/Documentation/process/submit-checklist.rst
@@ -37,7 +37,9 @@ and elsewhere regarding submitting Linux kernel patches.
You should be able to justify all violations that remain in
your patch.
-6) Any new or modified ``CONFIG`` options don't muck up the config menu.
+6) Any new or modified ``CONFIG`` options do not muck up the config menu and
+ default to off unless they meet the exception criteria documented in
+ ``Documentation/kbuild/kconfig-language.txt`` Menu attributes: default value.
7) All new ``Kconfig`` options have help text.
diff --git a/Documentation/security/credentials.rst b/Documentation/security/credentials.rst
index 66a2e24939d8..5bb7125faeee 100644
--- a/Documentation/security/credentials.rst
+++ b/Documentation/security/credentials.rst
@@ -451,6 +451,13 @@ checks and hooks done. Both the current and the proposed sets of credentials
are available for this purpose as current_cred() will return the current set
still at this point.
+When replacing the group list, the new list must be sorted before it
+is added to the credential, as a binary search is used to test for
+membership. In practice, this means :c:func:`groups_sort` should be
+called before :c:func:`set_groups` or :c:func:`set_current_groups`.
+:c:func:`groups_sort)` must not be called on a ``struct group_list`` which
+is shared as it may permute elements as part of the sorting process
+even if the array is already sorted.
When the credential set is ready, it should be committed to the current process
by calling::
diff --git a/Documentation/security/self-protection.rst b/Documentation/security/self-protection.rst
index 60c8bd8b77bf..0f53826c78b9 100644
--- a/Documentation/security/self-protection.rst
+++ b/Documentation/security/self-protection.rst
@@ -270,6 +270,21 @@ attacks, it is important to defend against exposure of both kernel memory
addresses and kernel memory contents (since they may contain kernel
addresses or other sensitive things like canary values).
+Kernel addresses
+----------------
+
+Printing kernel addresses to userspace leaks sensitive information about
+the kernel memory layout. Care should be exercised when using any printk
+specifier that prints the raw address, currently %px, %p[ad], (and %p[sSb]
+in certain circumstances [*]). Any file written to using one of these
+specifiers should be readable only by privileged processes.
+
+Kernels 4.14 and older printed the raw address using %p. As of 4.15-rc1
+addresses printed with the specifier %p are hashed before printing.
+
+[*] If KALLSYMS is enabled and symbol lookup fails, the raw address is
+printed. If KALLSYMS is not enabled the raw address is printed.
+
Unique identifiers
------------------
diff --git a/Documentation/sparc/oradax/dax-hv-api.txt b/Documentation/sparc/oradax/dax-hv-api.txt
new file mode 100644
index 000000000000..73e8d506cf64
--- /dev/null
+++ b/Documentation/sparc/oradax/dax-hv-api.txt
@@ -0,0 +1,1433 @@
+Excerpt from UltraSPARC Virtual Machine Specification
+Compiled from version 3.0.20+15
+Publication date 2017-09-25 08:21
+Copyright © 2008, 2015 Oracle and/or its affiliates. All rights reserved.
+Extracted via "pdftotext -f 547 -l 572 -layout sun4v_20170925.pdf"
+Authors:
+ Charles Kunzman
+ Sam Glidden
+ Mark Cianchetti
+
+
+Chapter 36. Coprocessor services
+ The following APIs provide access via the Hypervisor to hardware assisted data processing functionality.
+ These APIs may only be provided by certain platforms, and may not be available to all virtual machines
+ even on supported platforms. Restrictions on the use of these APIs may be imposed in order to support
+ live-migration and other system management activities.
+
+36.1. Data Analytics Accelerator
+ The Data Analytics Accelerator (DAX) functionality is a collection of hardware coprocessors that provide
+ high speed processoring of database-centric operations. The coprocessors may support one or more of
+ the following data query operations: search, extraction, compression, decompression, and translation. The
+ functionality offered may vary by virtual machine implementation.
+
+ The DAX is a virtual device to sun4v guests, with supported data operations indicated by the virtual device
+ compatibilty property. Functionality is accessed through the submission of Command Control Blocks
+ (CCBs) via the ccb_submit API function. The operations are processed asynchronously, with the status
+ of the submitted operations reported through a Completion Area linked to each CCB. Each CCB has a
+ separate Completion Area and, unless execution order is specifically restricted through the use of serial-
+ conditional flags, the execution order of submitted CCBs is arbitrary. Likewise, the time to completion
+ for a given CCB is never guaranteed.
+
+ Guest software may implement a software timeout on CCB operations, and if the timeout is exceeded, the
+ operation may be cancelled or killed via the ccb_kill API function. It is recommended for guest software
+ to implement a software timeout to account for certain RAS errors which may result in lost CCBs. It is
+ recommended such implementation use the ccb_info API function to check the status of a CCB prior to
+ killing it in order to determine if the CCB is still in queue, or may have been lost due to a RAS error.
+
+ There is no fixed limit on the number of outstanding CCBs guest software may have queued in the virtual
+ machine, however, internal resource limitations within the virtual machine can cause CCB submissions
+ to be temporarily rejected with EWOULDBLOCK. In such cases, guests should continue to attempt
+ submissions until they succeed; waiting for an outstanding CCB to complete is not necessary, and would
+ not be a guarantee that a future submission would succeed.
+
+ The availablility of DAX coprocessor command service is indicated by the presence of the DAX virtual
+ device node in the guest MD (Section 8.24.17, “Database Analytics Accelerators (DAX) virtual-device
+ node”).
+
+36.1.1. DAX Compatibility Property
+ The query functionality may vary based on the compatibility property of the virtual device:
+
+36.1.1.1. "ORCL,sun4v-dax" Device Compatibility
+ Available CCB commands:
+
+ • No-op/Sync
+
+ • Extract
+
+ • Scan Value
+
+ • Inverted Scan Value
+
+ • Scan Range
+
+
+ 509
+ Coprocessor services
+
+
+ • Inverted Scan Range
+
+ • Translate
+
+ • Inverted Translate
+
+ • Select
+
+ See Section 36.2.1, “Query CCB Command Formats” for the corresponding CCB input and output formats.
+
+ Only version 0 CCBs are available.
+
+36.1.1.2. "ORCL,sun4v-dax-fc" Device Compatibility
+ "ORCL,sun4v-dax-fc" is compatible with the "ORCL,sun4v-dax" interface, and includes additional CCB
+ bit fields and controls.
+
+36.1.1.3. "ORCL,sun4v-dax2" Device Compatibility
+ Available CCB commands:
+
+ • No-op/Sync
+
+ • Extract
+
+ • Scan Value
+
+ • Inverted Scan Value
+
+ • Scan Range
+
+ • Inverted Scan Range
+
+ • Translate
+
+ • Inverted Translate
+
+ • Select
+
+ See Section 36.2.1, “Query CCB Command Formats” for the corresponding CCB input and output formats.
+
+ Version 0 and 1 CCBs are available. Only version 0 CCBs may use Huffman encoded data, whereas only
+ version 1 CCBs may use OZIP.
+
+36.1.2. DAX Virtual Device Interrupts
+ The DAX virtual device has multiple interrupts associated with it which may be used by the guest if
+ desired. The number of device interrupts available to the guest is indicated in the virtual device node of the
+ guest MD (Section 8.24.17, “Database Analytics Accelerators (DAX) virtual-device node”). If the device
+ node indicates N interrupts available, the guest may use any value from 0 to N - 1 (inclusive) in a CCB
+ interrupt number field. Using values outside this range will result in the CCB being rejected for an invalid
+ field value.
+
+ The interrupts may be bound and managed using the standard sun4v device interrupts API (Chapter 16,
+ Device interrupt services). Sysino interrupts are not available for DAX devices.
+
+36.2. Coprocessor Control Block (CCB)
+ CCBs are either 64 or 128 bytes long, depending on the operation type. The exact contents of the CCB
+ are command specific, but all CCBs contain at least one memory buffer address. All memory locations
+
+
+ 510
+ Coprocessor services
+
+
+referenced by a CCB must be pinned in memory until the CCB either completes execution or is killed
+via the ccb_kill API call. Changes in virtual address mappings occurring after CCB submission are not
+guaranteed to be visible, and as such all virtual address updates need to be synchronized with CCB
+execution.
+
+All CCBs begin with a common 32-bit header.
+
+Table 36.1. CCB Header Format
+Bits Field Description
+[31:28] CCB version. For API version 2.0: set to 1 if CCB uses OZIP encoding; set to 0 if the CCB
+ uses Huffman encoding; otherwise either 0 or 1. For API version 1.0: always set to 0.
+[27] When API version 2.0 is negotiated, this is the Pipeline Flag [512]. It is reserved in
+ API version 1.0
+[26] Long CCB flag [512]
+[25] Conditional synchronization flag [512]
+[24] Serial synchronization flag
+[23:16] CCB operation code:
+ 0x00 No Operation (No-op) or Sync
+ 0x01 Extract
+ 0x02 Scan Value
+ 0x12 Inverted Scan Value
+ 0x03 Scan Range
+ 0x13 Inverted Scan Range
+ 0x04 Translate
+ 0x14 Inverted Translate
+ 0x05 Select
+[15:13] Reserved
+[12:11] Table address type
+ 0b'00 No address
+ 0b'01 Alternate context virtual address
+ 0b'10 Real address
+ 0b'11 Primary context virtual address
+[10:8] Output/Destination address type
+ 0b'000 No address
+ 0b'001 Alternate context virtual address
+ 0b'010 Real address
+ 0b'011 Primary context virtual address
+ 0b'100 Reserved
+ 0b'101 Reserved
+ 0b'110 Reserved
+ 0b'111 Reserved
+[7:5] Secondary source address type
+
+
+ 511
+ Coprocessor services
+
+
+Bits Field Description
+ 0b'000 No address
+ 0b'001 Alternate context virtual address
+ 0b'010 Real address
+ 0b'011 Primary context virtual address
+ 0b'100 Reserved
+ 0b'101 Reserved
+ 0b'110 Reserved
+ 0b'111 Reserved
+[4:2] Primary source address type
+ 0b'000 No address
+ 0b'001 Alternate context virtual address
+ 0b'010 Real address
+ 0b'011 Primary context virtual address
+ 0b'100 Reserved
+ 0b'101 Reserved
+ 0b'110 Reserved
+ 0b'111 Reserved
+[1:0] Completion area address type
+ 0b'00 No address
+ 0b'01 Alternate context virtual address
+ 0b'10 Real address
+ 0b'11 Primary context virtual address
+
+The Long CCB flag indicates whether the submitted CCB is 64 or 128 bytes long; value is 0 for 64 bytes
+and 1 for 128 bytes.
+
+The Serial and Conditional flags allow simple relative ordering between CCBs. Any CCB with the Serial
+flag set will execute sequentially relative to any previous CCB that is also marked as Serial in the same
+CCB submission. CCBs without the Serial flag set execute independently, even if they are between CCBs
+with the Serial flag set. CCBs marked solely with the Serial flag will execute upon the completion of the
+previous Serial CCB, regardless of the completion status of that CCB. The Conditional flag allows CCBs
+to conditionally execute based on the successful execution of the closest CCB marked with the Serial flag.
+A CCB may only be conditional on exactly one CCB, however, a CCB may be marked both Conditional
+and Serial to allow execution chaining. The flags do NOT allow fan-out chaining, where multiple CCBs
+execute in parallel based on the completion of another CCB.
+
+The Pipeline flag is an optimization that directs the output of one CCB (the "source" CCB) directly to
+the input of the next CCB (the "target" CCB). The target CCB thus does not need to read the input from
+memory. The Pipeline flag is advisory and may be dropped.
+
+Both the Pipeline and Serial bits must be set in the source CCB. The Conditional bit must be set in the
+target CCB. Exactly one CCB must be made conditional on the source CCB; either 0 or 2 target CCBs
+is invalid. However, Pipelines can be extended beyond two CCBs: the sequence would start with a CCB
+with both the Pipeline and Serial bits set, proceed through CCBs with the Pipeline, Serial, and Conditional
+bits set, and terminate at a CCB that has the Conditional bit set, but not the Pipeline bit.
+
+
+ 512
+ Coprocessor services
+
+
+ The input of the target CCB must start within 64 bytes of the output of the source CCB or the pipeline flag
+ will be ignored. All CCBs in a pipeline must be submitted in the same call to ccb_submit.
+
+ The various address type fields indicate how the various address values used in the CCB should be
+ interpreted by the virtual machine. Not all of the types specified are used by every CCB format. Types
+ which are not applicable to the given CCB command should be indicated as type 0 (No address). Virtual
+ addresses used in the CCB must have translation entries present in either the TLB or a configured TSB
+ for the submitting virtual processor. Virtual addresses which cannot be translated by the virtual machine
+ will result in the CCB submission being rejected, with the causal virtual address indicated. The CCB
+ may be resubmitted after inserting the translation, or the address may be translated by guest software and
+ resubmitted using the real address translation.
+
+36.2.1. Query CCB Command Formats
+36.2.1.1. Supported Data Formats, Elements Sizes and Offsets
+ Data for query commands may be encoded in multiple possible formats. The data query commands use a
+ common set of values to indicate the encoding formats of the data being processed. Some encoding formats
+ require multiple data streams for processing, requiring the specification of both primary data formats (the
+ encoded data) and secondary data streams (meta-data for the encoded data).
+
+36.2.1.1.1. Primary Input Format
+
+ The primary input format code is a 4-bit field when it is used. There are 10 primary input formats available.
+ The packed formats are not endian neutral. Code values not listed below are reserved.
+
+ Code Format Description
+ 0x0 Fixed width byte packed Up to 16 bytes
+ 0x1 Fixed width bit packed Up to 15 bits (CCB version 0) or 23 bits (CCB version
+ 1); bits are read most significant bit to least significant bit
+ within a byte
+ 0x2 Variable width byte packed Data stream of lengths must be provided as a secondary
+ input
+ 0x4 Fixed width byte packed with run Up to 16 bytes; data stream of run lengths must be
+ length encoding provided as a secondary input
+ 0x5 Fixed width bit packed with run Up to 15 bits (CCB version 0) or 23 bits (CCB version
+ length encoding 1); bits are read most significant bit to least significant bit
+ within a byte; data stream of run lengths must be provided
+ as a secondary input
+ 0x8 Fixed width byte packed with Up to 16 bytes before the encoding; compressed stream
+ Huffman (CCB version 0) or bits are read most significant bit to least significant bit
+ OZIP (CCB version 1) encoding within a byte; pointer to the encoding table must be
+ provided
+ 0x9 Fixed width bit packed with Up to 15 bits (CCB version 0) or 23 bits (CCB version
+ Huffman (CCB version 0) or 1); compressed stream bits are read most significant bit to
+ OZIP (CCB version 1) encoding least significant bit within a byte; pointer to the encoding
+ table must be provided
+ 0xA Variable width byte packed with Up to 16 bytes before the encoding; compressed stream
+ Huffman (CCB version 0) or bits are read most significant bit to least significant bit
+ OZIP (CCB version 1) encoding within a byte; data stream of lengths must be provided as
+ a secondary input; pointer to the encoding table must be
+ provided
+
+
+ 513
+ Coprocessor services
+
+
+ Code Format Description
+ 0xC Fixed width byte packed with Up to 16 bytes before the encoding; compressed stream
+ run length encoding, followed by bits are read most significant bit to least significant bit
+ Huffman (CCB version 0) or within a byte; data stream of run lengths must be provided
+ OZIP (CCB version 1) encoding as a secondary input; pointer to the encoding table must
+ be provided
+ 0xD Fixed width bit packed with Up to 15 bits (CCB version 0) or 23 bits(CCB version 1)
+ run length encoding, followed by before the encoding; compressed stream bits are read most
+ Huffman (CCB version 0) or significant bit to least significant bit within a byte; data
+ OZIP (CCB version 1) encoding stream of run lengths must be provided as a secondary
+ input; pointer to the encoding table must be provided
+
+ If OZIP encoding is used, there must be no reserved bytes in the table.
+
+36.2.1.1.2. Primary Input Element Size
+
+ For primary input data streams with fixed size elements, the element size must be indicated in the CCB
+ command. The size is encoded as the number of bits or bytes, minus one. The valid value range for this
+ field depends on the input format selected, as listed in the table above.
+
+36.2.1.1.3. Secondary Input Format
+
+ For primary input data streams which require a secondary input stream, the secondary input stream is
+ always encoded in a fixed width, bit-packed format. The bits are read from most significant bit to least
+ significant bit within a byte. There are two encoding options for the secondary input stream data elements,
+ depending on whether the value of 0 is needed:
+
+ Secondary Input Description
+ Format Code
+ 0 Element is stored as value minus 1 (0 evalutes to 1, 1 evalutes
+ to 2, etc)
+ 1 Element is stored as value
+
+36.2.1.1.4. Secondary Input Element Size
+
+ Secondary input element size is encoded as a two bit field:
+
+ Secondary Input Size Description
+ Code
+ 0x0 1 bit
+ 0x1 2 bits
+ 0x2 4 bits
+ 0x3 8 bits
+
+36.2.1.1.5. Input Element Offsets
+
+ Bit-wise input data streams may have any alignment within the base addressed byte. The offset, specified
+ from most significant bit to least significant bit, is provided as a fixed 3 bit field for each input type. A
+ value of 0 indicates that the first input element begins at the most significant bit in the first byte, and a
+ value of 7 indicates it begins with the least significant bit.
+
+ This field should be zero for any byte-wise primary input data streams.
+
+
+ 514
+ Coprocessor services
+
+
+36.2.1.1.6. Output Format
+
+ Query commands support multiple sizes and encodings for output data streams. There are four possible
+ output encodings, and up to four supported element sizes per encoding. Not all output encodings are
+ supported for every command. The format is indicated by a 4-bit field in the CCB:
+
+ Output Format Code Description
+ 0x0 Byte aligned, 1 byte elements
+ 0x1 Byte aligned, 2 byte elements
+ 0x2 Byte aligned, 4 byte elements
+ 0x3 Byte aligned, 8 byte elements
+ 0x4 16 byte aligned, 16 byte elements
+ 0x5 Reserved
+ 0x6 Reserved
+ 0x7 Reserved
+ 0x8 Packed vector of single bit elements
+ 0x9 Reserved
+ 0xA Reserved
+ 0xB Reserved
+ 0xC Reserved
+ 0xD 2 byte elements where each element is the index value of a bit,
+ from an bit vector, which was 1.
+ 0xE 4 byte elements where each element is the index value of a bit,
+ from an bit vector, which was 1.
+ 0xF Reserved
+
+36.2.1.1.7. Application Data Integrity (ADI)
+
+ On platforms which support ADI, the ADI version number may be specified for each separate memory
+ access type used in the CCB command. ADI checking only occurs when reading data. When writing data,
+ the specified ADI version number overwrites any existing ADI value in memory.
+
+ An ADI version value of 0 or 0xF indicates the ADI checking is disabled for that data access, even if it is
+ enabled in memory. By setting the appropriate flag in CCB_SUBMIT (Section 36.3.1, “ccb_submit”) it is
+ also an option to disable ADI checking for all inputs accessed via virtual address for all CCBs submitted
+ during that hypercall invocation.
+
+ The ADI value is only guaranteed to be checked on the first 64 bytes of each data access. Mismatches on
+ subsequent data chunks may not be detected, so guest software should be careful to use page size checking
+ to protect against buffer overruns.
+
+36.2.1.1.8. Page size checking
+
+ All data accesses used in CCB commands must be bounded within a single memory page. When addresses
+ are provided using a virtual address, the page size for checking is extracted from the TTE for that virtual
+ address. When using real addresses, the guest must supply the page size in the same field as the address
+ value. The page size must be one of the sizes supported by the underlying virtual machine. Using a value
+ that is not supported may result in the CCB submission being rejected or the generation of a CCB parsing
+ error in the completion area.
+
+
+ 515
+ Coprocessor services
+
+
+36.2.1.2. Extract command
+
+ Converts an input vector in one format to an output vector in another format. All input format types are
+ supported.
+
+ The only supported output format is a padded, byte-aligned output stream, using output codes 0x0 - 0x4.
+ When the specified output element size is larger than the extracted input element size, zeros are padded to
+ the extracted input element. First, if the decompressed input size is not a whole number of bytes, 0 bits are
+ padded to the most significant bit side till the next byte boundary. Next, if the output element size is larger
+ than the byte padded input element, bytes of value 0 are added based on the Padding Direction bit in the
+ CCB. If the output element size is smaller than the byte-padded input element size, the input element is
+ truncated by dropped from the least significant byte side until the selected output size is reached.
+
+ The return value of the CCB completion area is invalid. The “number of elements processed” field in the
+ CCB completion area will be valid.
+
+ The extract CCB is a 64-byte “short format” CCB.
+
+ The extract CCB command format can be specified by the following packed C structure for a big-endian
+ machine:
+
+
+ struct extract_ccb {
+ uint32_t header;
+ uint32_t control;
+ uint64_t completion;
+ uint64_t primary_input;
+ uint64_t data_access_control;
+ uint64_t secondary_input;
+ uint64_t reserved;
+ uint64_t output;
+ uint64_t table;
+ };
+
+
+ The exact field offsets, sizes, and composition are as follows:
+
+ Offset Size Field Description
+ 0 4 CCB header (Table 36.1, “CCB Header Format”)
+ 4 4 Command control
+ Bits Field Description
+ [31:28] Primary Input Format (see Section 36.2.1.1.1, “Primary Input
+ Format”)
+ [27:23] Primary Input Element Size (see Section 36.2.1.1.2, “Primary
+ Input Element Size”)
+ [22:20] Primary Input Starting Offset (see Section 36.2.1.1.5, “Input
+ Element Offsets”)
+ [19] Secondary Input Format (see Section 36.2.1.1.3, “Secondary
+ Input Format”)
+ [18:16] Secondary Input Starting Offset (see Section 36.2.1.1.5, “Input
+ Element Offsets”)
+
+
+ 516
+ Coprocessor services
+
+
+Offset Size Field Description
+ Bits Field Description
+ [15:14] Secondary Input Element Size (see Section 36.2.1.1.4,
+ “Secondary Input Element Size”
+ [13:10] Output Format (see Section 36.2.1.1.6, “Output Format”)
+ [9] Padding Direction selector: A value of 1 causes padding bytes
+ to be added to the left side of output elements. A value of 0
+ causes padding bytes to be added to the right side of output
+ elements.
+ [8:0] Reserved
+8 8 Completion
+ Bits Field Description
+ [63:60] ADI version (see Section 36.2.1.1.7, “Application Data
+ Integrity (ADI)”)
+ [59] If set to 1, a virtual device interrupt will be generated using
+ the device interrupt number specified in the lower bits of this
+ completion word. If 0, the lower bits of this completion word
+ are ignored.
+ [58:6] Completion area address bits [58:6]. Address type is
+ determined by CCB header.
+ [5:0] Virtual device interrupt number for completion interrupt, if
+ enabled.
+16 8 Primary Input
+ Bits Field Description
+ [63:60] ADI version (see Section 36.2.1.1.7, “Application Data
+ Integrity (ADI)”)
+ [59:56] If using real address, these bits should be filled in with the
+ page size code for the page boundary checking the guest wants
+ the virtual machine to use when accessing this data stream
+ (checking is only guaranteed to be performed when using API
+ version 1.1 and later). If using a virtual address, this field will
+ be used as as primary input address bits [59:56].
+ [55:0] Primary input address bits [55:0]. Address type is determined
+ by CCB header.
+24 8 Data Access Control
+ Bits Field Description
+ [63:62] Flow Control
+ Value Description
+ 0b'00 Disable flow control
+ 0b'01 Enable flow control (only valid with "ORCL,sun4v-
+ dax-fc" compatible virtual device variants)
+ 0b'10 Reserved
+ 0b'11 Reserved
+ [61:60] Reserved (API 1.0)
+
+
+ 517
+ Coprocessor services
+
+
+Offset Size Field Description
+ Bits Field Description
+ Pipeline target (API 2.0)
+ Value Description
+ 0b'00 Connect to primary input
+ 0b'01 Connect to secondary input
+ 0b'10 Reserved
+ 0b'11 Reserved
+ [59:40] Output buffer size given in units of 64 bytes, minus 1. Value of
+ 0 means 64 bytes, value of 1 means 128 bytes, etc. Buffer size is
+ only enforced if flow control is enabled in Flow Control field.
+ [39:32] Reserved
+ [31:30] Output Data Cache Allocation
+ Value Description
+ 0b'00 Do not allocate cache lines for output data stream.
+ 0b'01 Force cache lines for output data stream to be
+ allocated in the cache that is local to the submitting
+ virtual cpu.
+ 0b'10 Allocate cache lines for output data stream, but allow
+ existing cache lines associated with the data to remain
+ in their current cache instance. Any memory not
+ already in cache will be allocated in the cache local
+ to the submitting virtual cpu.
+ 0b'11 Reserved
+ [29:26] Reserved
+ [25:24] Primary Input Length Format
+ Value Description
+ 0b'00 Number of primary symbols
+ 0b'01 Number of primary bytes
+ 0b'10 Number of primary bits
+ 0b'11 Reserved
+ [23:0] Primary Input Length
+ Format Field Value
+ # of primary symbols Number of input elements to process,
+ minus 1. Command execution stops
+ once count is reached.
+ # of primary bytes Number of input bytes to process,
+ minus 1. Command execution stops
+ once count is reached. The count is
+ done before any decompression or
+ decoding.
+ # of primary bits Number of input bits to process,
+ minus 1. Command execution stops
+
+
+
+ 518
+ Coprocessor services
+
+
+ Offset Size Field Description
+ Bits Field Description
+ Format Field Value
+ once count is reached. The count is
+ done before any decompression or
+ decoding, and does not include any
+ bits skipped by the Primary Input
+ Offset field value of the command
+ control word.
+ 32 8 Secondary Input, if used by Primary Input Format. Same fields as Primary
+ Input.
+ 40 8 Reserved
+ 48 8 Output (same fields as Primary Input)
+ 56 8 Symbol Table (if used by Primary Input)
+ Bits Field Description
+ [63:60] ADI version (see Section 36.2.1.1.7, “Application Data
+ Integrity (ADI)”)
+ [59:56] If using real address, these bits should be filled in with the
+ page size code for the page boundary checking the guest wants
+ the virtual machine to use when accessing this data stream
+ (checking is only guaranteed to be performed when using API
+ version 1.1 and later). If using a virtual address, this field will
+ be used as as symbol table address bits [59:56].
+ [55:4] Symbol table address bits [55:4]. Address type is determined
+ by CCB header.
+ [3:0] Symbol table version
+ Value Description
+ 0 Huffman encoding. Must use 64 byte aligned table
+ address. (Only available when using version 0 CCBs)
+ 1 OZIP encoding. Must use 16 byte aligned table
+ address. (Only available when using version 1 CCBs)
+
+
+36.2.1.3. Scan commands
+
+ The scan commands search a stream of input data elements for values which match the selection criteria.
+ All the input format types are supported. There are multiple formats for the scan commands, allowing the
+ scan to search for exact matches to one value, exact matches to either of two values, or any value within
+ a specified range. The specific type of scan is indicated by the command code in the CCB header. For the
+ scan range commands, the boundary conditions can be specified as greater-than-or-equal-to a value, less-
+ than-or-equal-to a value, or both by using two boundary values.
+
+ There are two supported formats for the output stream: the bit vector and index array formats (codes 0x8,
+ 0xD, and 0xE). For the standard scan command using the bit vector output, for each input element there
+ exists one bit in the vector that is set if the input element matched the scan criteria, or clear if not. The
+ inverted scan command inverts the polarity of the bits in the output. The most significant bit of the first
+ byte of the output stream corresponds to the first element in the input stream. The standard index array
+ output format contains one array entry for each input element that matched the scan criteria. Each array
+
+
+
+ 519
+ Coprocessor services
+
+
+entry is the index of an input element that matched the scan criteria. An inverted scan command produces
+a similar array, but of all the input elements which did NOT match the scan criteria.
+
+The return value of the CCB completion area contains the number of input elements found which match
+the scan criteria (or number that did not match for the inverted scans). The “number of elements processed”
+field in the CCB completion area will be valid, indicating the number of input elements processed.
+
+These commands are 128-byte “long format” CCBs.
+
+The scan CCB command format can be specified by the following packed C structure for a big-endian
+machine:
+
+
+ struct scan_ccb {
+ uint32_t header;
+ uint32_t control;
+ uint64_t completion;
+ uint64_t primary_input;
+ uint64_t data_access_control;
+ uint64_t secondary_input;
+ uint64_t match_criteria0;
+ uint64_t output;
+ uint64_t table;
+ uint64_t match_criteria1;
+ uint64_t match_criteria2;
+ uint64_t match_criteria3;
+ uint64_t reserved[5];
+ };
+
+
+The exact field offsets, sizes, and composition are as follows:
+
+Offset Size Field Description
+0 4 CCB header (Table 36.1, “CCB Header Format”)
+4 4 Command control
+ Bits Field Description
+ [31:28] Primary Input Format (see Section 36.2.1.1.1, “Primary Input
+ Format”)
+ [27:23] Primary Input Element Size (see Section 36.2.1.1.2, “Primary
+ Input Element Size”)
+ [22:20] Primary Input Starting Offset (see Section 36.2.1.1.5, “Input
+ Element Offsets”)
+ [19] Secondary Input Format (see Section 36.2.1.1.3, “Secondary
+ Input Format”)
+ [18:16] Secondary Input Starting Offset (see Section 36.2.1.1.5, “Input
+ Element Offsets”)
+ [15:14] Secondary Input Element Size (see Section 36.2.1.1.4,
+ “Secondary Input Element Size”
+ [13:10] Output Format (see Section 36.2.1.1.6, “Output Format”)
+ [9:5] Operand size for first scan criteria value. In a scan value
+ operation, this is one of two potential extact match values.
+ In a scan range operation, this is the size of the upper range
+
+
+ 520
+ Coprocessor services
+
+
+Offset Size Field Description
+ Bits Field Description
+ boundary. The value of this field is the number of bytes in the
+ operand, minus 1. Values 0xF-0x1E are reserved. A value of
+ 0x1F indicates this operand is not in use for this scan operation.
+ [4:0] Operand size for second scan criteria value. In a scan value
+ operation, this is one of two potential extact match values.
+ In a scan range operation, this is the size of the lower range
+ boundary. The value of this field is the number of bytes in the
+ operand, minus 1. Values 0xF-0x1E are reserved. A value of
+ 0x1F indicates this operand is not in use for this scan operation.
+8 8 Completion (same fields as Section 36.2.1.2, “Extract command”)
+16 8 Primary Input (same fields as Section 36.2.1.2, “Extract command”)
+24 8 Data Access Control (same fields as Section 36.2.1.2, “Extract command”)
+32 8 Secondary Input, if used by Primary Input Format. Same fields as Primary
+ Input.
+40 4 Most significant 4 bytes of first scan criteria operand. If first operand is less
+ than 4 bytes, the value is left-aligned to the lowest address bytes.
+44 4 Most significant 4 bytes of second scan criteria operand. If second operand
+ is less than 4 bytes, the value is left-aligned to the lowest address bytes.
+48 8 Output (same fields as Primary Input)
+56 8 Symbol Table (if used by Primary Input). Same fields as Section 36.2.1.2,
+ “Extract command”
+64 4 Next 4 most significant bytes of first scan criteria operand occuring after the
+ bytes specified at offset 40, if needed by the operand size. If first operand
+ is less than 8 bytes, the valid bytes are left-aligned to the lowest address.
+68 4 Next 4 most significant bytes of second scan criteria operand occuring after
+ the bytes specified at offset 44, if needed by the operand size. If second
+ operand is less than 8 bytes, the valid bytes are left-aligned to the lowest
+ address.
+72 4 Next 4 most significant bytes of first scan criteria operand occuring after the
+ bytes specified at offset 64, if needed by the operand size. If first operand
+ is less than 12 bytes, the valid bytes are left-aligned to the lowest address.
+76 4 Next 4 most significant bytes of second scan criteria operand occuring after
+ the bytes specified at offset 68, if needed by the operand size. If second
+ operand is less than 12 bytes, the valid bytes are left-aligned to the lowest
+ address.
+80 4 Next 4 most significant bytes of first scan criteria operand occuring after the
+ bytes specified at offset 72, if needed by the operand size. If first operand
+ is less than 16 bytes, the valid bytes are left-aligned to the lowest address.
+84 4 Next 4 most significant bytes of second scan criteria operand occuring after
+ the bytes specified at offset 76, if needed by the operand size. If second
+ operand is less than 16 bytes, the valid bytes are left-aligned to the lowest
+ address.
+
+
+
+
+ 521
+ Coprocessor services
+
+
+36.2.1.4. Translate commands
+
+ The translate commands takes an input array of indicies, and a table of single bit values indexed by those
+ indicies, and outputs a bit vector or index array created by reading the tables bit value at each index in
+ the input array. The output should therefore contain exactly one bit per index in the input data stream,
+ when outputing as a bit vector. When outputing as an index array, the number of elements depends on the
+ values read in the bit table, but will always be less than, or equal to, the number of input elements. Only
+ a restricted subset of the possible input format types are supported. No variable width or Huffman/OZIP
+ encoded input streams are allowed. The primary input data element size must be 3 bytes or less.
+
+ The maximum table index size allowed is 15 bits, however, larger input elements may be used to provide
+ additional processing of the output values. If 2 or 3 byte values are used, the least significant 15 bits are
+ used as an index into the bit table. The most significant 9 bits (when using 3-byte input elements) or single
+ bit (when using 2-byte input elements) are compared against a fixed 9-bit test value provided in the CCB.
+ If the values match, the value from the bit table is used as the output element value. If the values do not
+ match, the output data element value is forced to 0.
+
+ In the inverted translate operation, the bit value read from bit table is inverted prior to its use. The additional
+ additional processing based on any additional non-index bits remains unchanged, and still forces the output
+ element value to 0 on a mismatch. The specific type of translate command is indicated by the command
+ code in the CCB header.
+
+ There are two supported formats for the output stream: the bit vector and index array formats (codes 0x8,
+ 0xD, and 0xE). The index array format is an array of indicies of bits which would have been set if the
+ output format was a bit array.
+
+ The return value of the CCB completion area contains the number of bits set in the output bit vector,
+ or number of elements in the output index array. The “number of elements processed” field in the CCB
+ completion area will be valid, indicating the number of input elements processed.
+
+ These commands are 64-byte “short format” CCBs.
+
+ The translate CCB command format can be specified by the following packed C structure for a big-endian
+ machine:
+
+
+ struct translate_ccb {
+ uint32_t header;
+ uint32_t control;
+ uint64_t completion;
+ uint64_t primary_input;
+ uint64_t data_access_control;
+ uint64_t secondary_input;
+ uint64_t reserved;
+ uint64_t output;
+ uint64_t table;
+ };
+
+
+ The exact field offsets, sizes, and composition are as follows:
+
+
+ Offset Size Field Description
+ 0 4 CCB header (Table 36.1, “CCB Header Format”)
+
+
+ 522
+ Coprocessor services
+
+
+Offset Size Field Description
+4 4 Command control
+ Bits Field Description
+ [31:28] Primary Input Format (see Section 36.2.1.1.1, “Primary Input
+ Format”)
+ [27:23] Primary Input Element Size (see Section 36.2.1.1.2, “Primary
+ Input Element Size”)
+ [22:20] Primary Input Starting Offset (see Section 36.2.1.1.5, “Input
+ Element Offsets”)
+ [19] Secondary Input Format (see Section 36.2.1.1.3, “Secondary
+ Input Format”)
+ [18:16] Secondary Input Starting Offset (see Section 36.2.1.1.5, “Input
+ Element Offsets”)
+ [15:14] Secondary Input Element Size (see Section 36.2.1.1.4,
+ “Secondary Input Element Size”
+ [13:10] Output Format (see Section 36.2.1.1.6, “Output Format”)
+ [9] Reserved
+ [8:0] Test value used for comparison against the most significant bits
+ in the input values, when using 2 or 3 byte input elements.
+8 8 Completion (same fields as Section 36.2.1.2, “Extract command”
+16 8 Primary Input (same fields as Section 36.2.1.2, “Extract command”
+24 8 Data Access Control (same fields as Section 36.2.1.2, “Extract command”,
+ except Primary Input Length Format may not use the 0x0 value)
+32 8 Secondary Input, if used by Primary Input Format. Same fields as Primary
+ Input.
+40 8 Reserved
+48 8 Output (same fields as Primary Input)
+56 8 Bit Table
+ Bits Field Description
+ [63:60] ADI version (see Section 36.2.1.1.7, “Application Data
+ Integrity (ADI)”)
+ [59:56] If using real address, these bits should be filled in with the
+ page size code for the page boundary checking the guest wants
+ the virtual machine to use when accessing this data stream
+ (checking is only guaranteed to be performed when using API
+ version 1.1 and later). If using a virtual address, this field will
+ be used as as bit table address bits [59:56]
+ [55:4] Bit table address bits [55:4]. Address type is determined by
+ CCB header. Address must be 64-byte aligned (CCB version
+ 0) or 16-byte aligned (CCB version 1).
+ [3:0] Bit table version
+ Value Description
+ 0 4KB table size
+ 1 8KB table size
+
+
+
+ 523
+ Coprocessor services
+
+
+36.2.1.5. Select command
+ The select command filters the primary input data stream by using a secondary input bit vector to determine
+ which input elements to include in the output. For each bit set at a given index N within the bit vector,
+ the Nth input element is included in the output. If the bit is not set, the element is not included. Only a
+ restricted subset of the possible input format types are supported. No variable width or run length encoded
+ input streams are allowed, since the secondary input stream is used for the filtering bit vector.
+
+ The only supported output format is a padded, byte-aligned output stream. The stream follows the same
+ rules and restrictions as padded output stream described in Section 36.2.1.2, “Extract command”.
+
+ The return value of the CCB completion area contains the number of bits set in the input bit vector. The
+ "number of elements processed" field in the CCB completion area will be valid, indicating the number
+ of input elements processed.
+
+ The select CCB is a 64-byte “short format” CCB.
+
+ The select CCB command format can be specified by the following packed C structure for a big-endian
+ machine:
+
+
+ struct select_ccb {
+ uint32_t header;
+ uint32_t control;
+ uint64_t completion;
+ uint64_t primary_input;
+ uint64_t data_access_control;
+ uint64_t secondary_input;
+ uint64_t reserved;
+ uint64_t output;
+ uint64_t table;
+ };
+
+
+ The exact field offsets, sizes, and composition are as follows:
+
+ Offset Size Field Description
+ 0 4 CCB header (Table 36.1, “CCB Header Format”)
+ 4 4 Command control
+ Bits Field Description
+ [31:28] Primary Input Format (see Section 36.2.1.1.1, “Primary Input
+ Format”)
+ [27:23] Primary Input Element Size (see Section 36.2.1.1.2, “Primary
+ Input Element Size”)
+ [22:20] Primary Input Starting Offset (see Section 36.2.1.1.5, “Input
+ Element Offsets”)
+ [19] Secondary Input Format (see Section 36.2.1.1.3, “Secondary
+ Input Format”)
+ [18:16] Secondary Input Starting Offset (see Section 36.2.1.1.5, “Input
+ Element Offsets”)
+ [15:14] Secondary Input Element Size (see Section 36.2.1.1.4,
+ “Secondary Input Element Size”
+
+
+ 524
+ Coprocessor services
+
+
+ Offset Size Field Description
+ Bits Field Description
+ [13:10] Output Format (see Section 36.2.1.1.6, “Output Format”)
+ [9] Padding Direction selector: A value of 1 causes padding bytes
+ to be added to the left side of output elements. A value of 0
+ causes padding bytes to be added to the right side of output
+ elements.
+ [8:0] Reserved
+ 8 8 Completion (same fields as Section 36.2.1.2, “Extract command”
+ 16 8 Primary Input (same fields as Section 36.2.1.2, “Extract command”
+ 24 8 Data Access Control (same fields as Section 36.2.1.2, “Extract command”)
+ 32 8 Secondary Bit Vector Input. Same fields as Primary Input.
+ 40 8 Reserved
+ 48 8 Output (same fields as Primary Input)
+ 56 8 Symbol Table (if used by Primary Input). Same fields as Section 36.2.1.2,
+ “Extract command”
+
+36.2.1.6. No-op and Sync commands
+ The no-op (no operation) command is a CCB which has no processing effect. The CCB, when processed
+ by the virtual machine, simply updates the completion area with its execution status. The CCB may have
+ the serial-conditional flags set in order to restrict when it executes.
+
+ The sync command is a variant of the no-op command which with restricted execution timing. A sync
+ command CCB will only execute when all previous commands submitted in the same request have
+ completed. This is stronger than the conditional flag sequencing, which is only dependent on a single
+ previous serial CCB. While the relative ordering is guaranteed, virtual machine implementations with
+ shared hardware resources may cause the sync command to wait for longer than the minimum required
+ time.
+
+ The return value of the CCB completion area is invalid for these CCBs. The “number of elements
+ processed” field is also invalid for these CCBs.
+
+ These commands are 64-byte “short format” CCBs.
+
+ The no-op CCB command format can be specified by the following packed C structure for a big-endian
+ machine:
+
+
+ struct nop_ccb {
+ uint32_t header;
+ uint32_t control;
+ uint64_t completion;
+ uint64_t reserved[6];
+ };
+
+
+ The exact field offsets, sizes, and composition are as follows:
+
+ Offset Size Field Description
+ 0 4 CCB header (Table 36.1, “CCB Header Format”)
+
+
+ 525
+ Coprocessor services
+
+
+ Offset Size Field Description
+ 4 4 Command control
+ Bits Field Description
+ [31] If set, this CCB functions as a Sync command. If clear, this
+ CCB functions as a No-op command.
+ [30:0] Reserved
+ 8 8 Completion (same fields as Section 36.2.1.2, “Extract command”
+ 16 46 Reserved
+
+36.2.2. CCB Completion Area
+ All CCB commands use a common 128-byte Completion Area format, which can be specified by the
+ following packed C structure for a big-endian machine:
+
+
+ struct completion_area {
+ uint8_t status_flag;
+ uint8_t error_note;
+ uint8_t rsvd0[2];
+ uint32_t error_values;
+ uint32_t output_size;
+ uint32_t rsvd1;
+ uint64_t run_time;
+ uint64_t run_stats;
+ uint32_t elements;
+ uint8_t rsvd2[20];
+ uint64_t return_value;
+ uint64_t extra_return_value[8];
+ };
+
+
+ The Completion Area must be a 128-byte aligned memory location. The exact layout can be described
+ using byte offsets and sizes relative to the memory base:
+
+ Offset Size Field Description
+ 0 1 CCB execution status
+ 0x0 Command not yet completed
+ 0x1 Command ran and succeeded
+ 0x2 Command ran and failed (partial results may be been
+ produced)
+ 0x3 Command ran and was killed (partial execution may
+ have occurred)
+ 0x4 Command was not run
+ 0x5-0xF Reserved
+ 1 1 Error reason code
+ 0x0 Reserved
+ 0x1 Buffer overflow
+
+
+ 526
+ Coprocessor services
+
+
+Offset Size Field Description
+ 0x2 CCB decoding error
+ 0x3 Page overflow
+ 0x4-0x6 Reserved
+ 0x7 Command was killed
+ 0x8 Command execution timeout
+ 0x9 ADI miscompare error
+ 0xA Data format error
+ 0xB-0xD Reserved
+ 0xE Unexpected hardware error (Do not retry)
+ 0xF Unexpected hardware error (Retry is ok)
+ 0x10-0x7F Reserved
+ 0x80 Partial Symbol Warning
+ 0x81-0xFF Reserved
+2 2 Reserved
+4 4 If a partial symbol warning was generated, this field contains the number
+ of remaining bits which were not decoded.
+8 4 Number of bytes of output produced
+12 4 Reserved
+16 8 Runtime of command (unspecified time units)
+24 8 Reserved
+32 4 Number of elements processed
+36 20 Reserved
+56 8 Return value
+64 64 Extended return value
+
+The CCB completion area should be treated as read-only by guest software. The CCB execution status
+byte will be cleared by the Hypervisor to reflect the pending execution status when the CCB is submitted
+successfully. All other fields are considered invalid upon CCB submission until the CCB execution status
+byte becomes non-zero.
+
+CCBs which complete with status 0x2 or 0x3 may produce partial results and/or side effects due to partial
+execution of the CCB command. Some valid data may be accessible depending on the fault type, however,
+it is recommended that guest software treat the destination buffer as being in an unknown state. If a CCB
+completes with a status byte of 0x2, the error reason code byte can be read to determine what corrective
+action should be taken.
+
+A buffer overflow indicates that the results of the operation exceeded the size of the output buffer indicated
+in the CCB. The operation can be retried by resubmitting the CCB with a larger output buffer.
+
+A CCB decoding error indicates that the CCB contained some invalid field values. It may be also be
+triggered if the CCB output is directed at a non-existent secondary input and the pipelining hint is followed.
+
+A page overflow error indicates that the operation required accessing a memory location beyond the page
+size associated with a given address. No data will have been read or written past the page boundary, but
+partial results may have been written to the destination buffer. The CCB can be resubmitted with a larger
+page size memory allocation to complete the operation.
+
+
+ 527
+ Coprocessor services
+
+
+ In the case of pipelined CCBs, a page overflow error will be triggered if the output from the pipeline source
+ CCB ends before the input of the pipeline target CCB. Page boundaries are ignored when the pipeline
+ hint is followed.
+
+ Command kill indicates that the CCB execution was halted or prevented by use of the ccb_kill API call.
+
+ Command timeout indicates that the CCB execution began, but did not complete within a pre-determined
+ limit set by the virtual machine. The command may have produced some or no output. The CCB may be
+ resubmitted with no alterations.
+
+ ADI miscompare indicates that the memory buffer version specified in the CCB did not match the value
+ in memory when accessed by the virtual machine. Guest software should not attempt to resubmit the CCB
+ without determining the cause of the version mismatch.
+
+ A data format error indicates that the input data stream did not follow the specified data input formatting
+ selected in the CCB.
+
+ Some CCBs which encounter hardware errors may be resubmitted without change. Persistent hardware
+ errors may result in multiple failures until RAS software can identify and isolate the faulty component.
+
+ The output size field indicates the number of bytes of valid output in the destination buffer. This field is
+ not valid for all possible CCB commands.
+
+ The runtime field indicates the execution time of the CCB command once it leaves the internal virtual
+ machine queue. The time units are fixed, but unspecified, allowing only relative timing comparisons
+ by guest software. The time units may also vary by hardware platform, and should not be construed to
+ represent any absolute time value.
+
+ Some data query commands process data in units of elements. If applicable to the command, the number of
+ elements processed is indicated in the listed field. This field is not valid for all possible CCB commands.
+
+ The return value and extended return value fields are output locations for commands which do not use
+ a destination output buffer, or have secondary return results. The field is not valid for all possible CCB
+ commands.
+
+36.3. Hypervisor API Functions
+36.3.1. ccb_submit
+ trap# FAST_TRAP
+ function# CCB_SUBMIT
+ arg0 address
+ arg1 length
+ arg2 flags
+ arg3 reserved
+ ret0 status
+ ret1 length
+ ret2 status data
+ ret3 reserved
+
+ Submit one or more coprocessor control blocks (CCBs) for evaluation and processing by the virtual
+ machine. The CCBs are passed in a linear array indicated by address. length indicates the size of
+ the array in bytes.
+
+
+ 528
+ Coprocessor services
+
+
+The address should be aligned to the size indicated by length, rounded up to the nearest power of
+two. Virtual machines implementations may reject submissions which do not adhere to that alignment.
+length must be a multiple of 64 bytes. If length is zero, the maximum supported array length will be
+returned as length in ret1. In all other cases, the length value in ret1 will reflect the number of bytes
+successfully consumed from the input CCB array.
+
+ Implementation note
+ Virtual machines should never reject submissions based on the alignment of address if the
+ entire array is contained within a single memory page of the smallest page size supported by the
+ virtual machine.
+
+A guest may choose to submit addresses used in this API function, including the CCB array address,
+as either a real or virtual addresses, with the type of each address indicated in flags. Virtual addresses
+must be present in either the TLB or an active TSB to be processed. The translation context for virtual
+addresses is determined by a combination of CCB contents and the flags argument.
+
+The flags argument is divided into multiple fields defined as follows:
+
+
+Bits Field Description
+[63:16] Reserved
+[15] Disable ADI for VA reads (in API 2.0)
+ Reserved (in API 1.0)
+[14] Virtual addresses within CCBs are translated in privileged context
+[13:12] Alternate translation context for virtual addresses within CCBs:
+ 0b'00 CCBs requesting alternate context are rejected
+ 0b'01 Reserved
+ 0b'10 CCBs requesting alternate context use secondary context
+ 0b'11 CCBs requesting alternate context use nucleus context
+[11:9] Reserved
+[8] Queue info flag
+[7] All-or-nothing flag
+[6] If address is a virtual address, treat its translation context as privileged
+[5:4] Address type of address:
+ 0b'00 Real address
+ 0b'01 Virtual address in primary context
+ 0b'10 Virtual address in secondary context
+ 0b'11 Virtual address in nucleus context
+[3:2] Reserved
+[1:0] CCB command type:
+ 0b'00 Reserved
+ 0b'01 Reserved
+ 0b'10 Query command
+ 0b'11 Reserved
+
+
+
+ 529
+ Coprocessor services
+
+
+ The CCB submission type and address type for the CCB array must be provided in the flags argument.
+ All other fields are optional values which change the default behavior of the CCB processing.
+
+ When set to one, the "Disable ADI for VA reads" bit will turn off ADI checking when using a virtual
+ address to load data. ADI checking will still be done when loading real-addressed memory. This bit is only
+ available when using major version 2 of the coprocessor API group; at major version 1 it is reserved. For
+ more information about using ADI and DAX, see Section 36.2.1.1.7, “Application Data Integrity (ADI)”.
+
+ By default, all virtual addresses are treated as user addresses. If the virtual address translations are
+ privileged, they must be marked as such in the appropriate flags field. The virtual addresses used within
+ the submitted CCBs must all be translated with the same privilege level.
+
+ By default, all virtual addresses used within the submitted CCBs are translated using the primary context
+ active at the time of the submission. The address type field within a CCB allows each address to request
+ translation in an alternate address context. The address context used when the alternate address context is
+ requested is selected in the flags argument.
+
+ The all-or-nothing flag specifies whether the virtual machine should allow partial submissions of the
+ input CCB array. When using CCBs with serial-conditional flags, it is strongly recommended to use
+ the all-or-nothing flag to avoid broken conditional chains. Using long CCB chains on a machine under
+ high coprocessor load may make this impractical, however, and require submitting without the flag.
+ When submitting serial-conditional CCBs without the all-or-nothing flag, guest software must manually
+ implement the serial-conditional behavior at any point where the chain was not submitted in a single API
+ call, and resubmission of the remaining CCBs should clear any conditional flag that might be set in the
+ first remaining CCB. Failure to do so will produce indeterminate CCB execution status and ordering.
+
+ When the all-or-nothing flag is not specified, callers should check the value of length in ret1 to determine
+ how many CCBs from the array were successfully submitted. Any remaining CCBs can be resubmitted
+ without modifications.
+
+ The value of length in ret1 is also valid when the API call returns an error, and callers should always
+ check its value to determine which CCBs in the array were already processed. This will additionally
+ identify which CCB encountered the processing error, and was not submitted successfully.
+
+ If the queue info flag is used during submission, and at least one CCB was successfully submitted, the
+ length value in ret1 will be a multi-field value defined as follows:
+ Bits Field Description
+ [63:48] DAX unit instance identifier
+ [47:32] DAX queue instance identifier
+ [31:16] Reserved
+ [15:0] Number of CCB bytes successfully submitted
+
+ The value of status data depends on the status value. See error status code descriptions for details.
+ The value is undefined for status values that do not specifically list a value for the status data.
+
+ The API has a reserved input and output register which will be used in subsequent minor versions of this
+ API function. Guest software implementations should treat that register as voltile across the function call
+ in order to maintain forward compatibility.
+
+36.3.1.1. Errors
+ EOK One or more CCBs have been accepted and enqueued in the virtual machine
+ and no errors were been encountered during submission. Some submitted
+ CCBs may not have been enqueued due to internal virtual machine limitations,
+ and may be resubmitted without changes.
+
+
+ 530
+ Coprocessor services
+
+
+EWOULDBLOCK An internal resource conflict within the virtual machine has prevented it from
+ being able to complete the CCB submissions sufficiently quickly, requiring
+ it to abandon processing before it was complete. Some CCBs may have been
+ successfully enqueued prior to the block, and all remaining CCBs may be
+ resubmitted without changes.
+EBADALIGN CCB array is not on a 64-byte boundary, or the array length is not a multiple
+ of 64 bytes.
+ENORADDR A real address used either for the CCB array, or within one of the submitted
+ CCBs, is not valid for the guest. Some CCBs may have been enqueued prior
+ to the error being detected.
+ENOMAP A virtual address used either for the CCB array, or within one of the submitted
+ CCBs, could not be translated by the virtual machine using either the TLB
+ or TSB contents. The submission may be retried after adding the required
+ mapping, or by converting the virtual address into a real address. Due to the
+ shared nature of address translation resources, there is no theoretical limit on
+ the number of times the translation may fail, and it is recommended all guests
+ implement some real address based backup. The virtual address which failed
+ translation is returned as status data in ret2. Some CCBs may have been
+ enqueued prior to the error being detected.
+EINVAL The virtual machine detected an invalid CCB during submission, or invalid
+ input arguments, such as bad flag values. Note that not all invalid CCB values
+ will be detected during submission, and some may be reported as errors in the
+ completion area instead. Some CCBs may have been enqueued prior to the
+ error being detected. This error may be returned if the CCB version is invalid.
+ETOOMANY The request was submitted with the all-or-nothing flag set, and the array size is
+ greater than the virtual machine can support in a single request. The maximum
+ supported size for the current virtual machine can be queried by submitting a
+ request with a zero length array, as described above.
+ENOACCESS The guest does not have permission to submit CCBs, or an address used in a
+ CCBs lacks sufficient permissions to perform the required operation (no write
+ permission on the destination buffer address, for example). A virtual address
+ which fails permission checking is returned as status data in ret2. Some
+ CCBs may have been enqueued prior to the error being detected.
+EUNAVAILABLE The requested CCB operation could not be performed at this time. The
+ restricted operation availability may apply only to the first unsuccessfully
+ submitted CCB, or may apply to a larger scope. The status should not be
+ interpreted as permanent, and the guest should attempt to submit CCBs in
+ the future which had previously been unable to be performed. The status
+ data provides additional information about scope of the retricted availability
+ as follows:
+ Value Description
+ 0 Processing for the exact CCB instance submitted was unavailable,
+ and it is recommended the guest emulate the operation. The
+ guest should continue to submit all other CCBs, and assume no
+ restrictions beyond this exact CCB instance.
+ 1 Processing is unavailable for all CCBs using the requested opcode,
+ and it is recommended the guest emulate the operation. The
+ guest should continue to submit all other CCBs that use different
+ opcodes, but can expect continued rejections of CCBs using the
+ same opcode in the near future.
+
+
+ 531
+ Coprocessor services
+
+
+ Value Description
+ 2 Processing is unavailable for all CCBs using the requested CCB
+ version, and it is recommended the guest emulate the operation.
+ The guest should continue to submit all other CCBs that use
+ different CCB versions, but can expect continued rejections of
+ CCBs using the same CCB version in the near future.
+ 3 Processing is unavailable for all CCBs on the submitting vcpu,
+ and it is recommended the guest emulate the operation or resubmit
+ the CCB on a different vcpu. The guest should continue to submit
+ CCBs on all other vcpus but can expect continued rejections of all
+ CCBs on this vcpu in the near future.
+ 4 Processing is unavailable for all CCBs, and it is recommended
+ the guest emulate the operation. The guest should expect all CCB
+ submissions to be similarly rejected in the near future.
+
+
+36.3.2. ccb_info
+
+ trap# FAST_TRAP
+ function# CCB_INFO
+ arg0 address
+ ret0 status
+ ret1 CCB state
+ ret2 position
+ ret3 dax
+ ret4 queue
+
+ Requests status information on a previously submitted CCB. The previously submitted CCB is identified
+ by the 64-byte aligned real address of the CCBs completion area.
+
+ A CCB can be in one of 4 states:
+
+
+ State Value Description
+ COMPLETED 0 The CCB has been fetched and executed, and is no longer active in
+ the virtual machine.
+ ENQUEUED 1 The requested CCB is current in a queue awaiting execution.
+ INPROGRESS 2 The CCB has been fetched and is currently being executed. It may still
+ be possible to stop the execution using the ccb_kill hypercall.
+ NOTFOUND 3 The CCB could not be located in the virtual machine, and does not
+ appear to have been executed. This may occur if the CCB was lost
+ due to a hardware error, or the CCB may not have been successfully
+ submitted to the virtual machine in the first place.
+
+ Implementation note
+ Some platforms may not be able to report CCBs that are currently being processed, and therefore
+ guest software should invoke the ccb_kill hypercall prior to assuming the request CCB will never
+ be executed because it was in the NOTFOUND state.
+
+
+ 532
+ Coprocessor services
+
+
+ The position return value is only valid when the state is ENQUEUED. The value returned is the number
+ of other CCBs ahead of the requested CCB, to provide a relative estimate of when the CCB may execute.
+
+ The dax return value is only valid when the state is ENQUEUED. The value returned is the DAX unit
+ instance indentifier for the DAX unit processing the queue where the requested CCB is located. The value
+ matches the value that would have been, or was, returned by ccb_submit using the queue info flag.
+
+ The queue return value is only valid when the state is ENQUEUED. The value returned is the DAX
+ queue instance indentifier for the DAX unit processing the queue where the requested CCB is located. The
+ value matches the value that would have been, or was, returned by ccb_submit using the queue info flag.
+
+36.3.2.1. Errors
+
+ EOK The request was proccessed and the CCB state is valid.
+ EBADALIGN address is not on a 64-byte aligned.
+ ENORADDR The real address provided for address is not valid.
+ EINVAL The CCB completion area contents are not valid.
+ EWOULDBLOCK Internal resource contraints prevented the CCB state from being queried at this
+ time. The guest should retry the request.
+ ENOACCESS The guest does not have permission to access the coprocessor virtual device
+ functionality.
+
+36.3.3. ccb_kill
+
+ trap# FAST_TRAP
+ function# CCB_KILL
+ arg0 address
+ ret0 status
+ ret1 result
+
+ Request to stop execution of a previously submitted CCB. The previously submitted CCB is identified by
+ the 64-byte aligned real address of the CCBs completion area.
+
+ The kill attempt can produce one of several values in the result return value, reflecting the CCB state
+ and actions taken by the Hypervisor:
+
+ Result Value Description
+ COMPLETED 0 The CCB has been fetched and executed, and is no longer active in
+ the virtual machine. It could not be killed and no action was taken.
+ DEQUEUED 1 The requested CCB was still enqueued when the kill request was
+ submitted, and has been removed from the queue. Since the CCB
+ never began execution, no memory modifications were produced by
+ it, and the completion area will never be updated. The same CCB may
+ be submitted again, if desired, with no modifications required.
+ KILLED 2 The CCB had been fetched and was being executed when the kill
+ request was submitted. The CCB execution was stopped, and the CCB
+ is no longer active in the virtual machine. The CCB completion area
+ will reflect the killed status, with the subsequent implications that
+ partial results may have been produced. Partial results may include full
+
+
+ 533
+ Coprocessor services
+
+
+ Result Value Description
+ command execution if the command was stopped just prior to writing
+ to the completion area.
+ NOTFOUND 3 The CCB could not be located in the virtual machine, and does not
+ appear to have been executed. This may occur if the CCB was lost
+ due to a hardware error, or the CCB may not have been successfully
+ submitted to the virtual machine in the first place. CCBs in the state
+ are guaranteed to never execute in the future unless resubmitted.
+
+36.3.3.1. Interactions with Pipelined CCBs
+
+ If the pipeline target CCB is killed but the pipeline source CCB was skipped, the completion area of the
+ target CCB may contain status (4,0) "Command was skipped" instead of (3,7) "Command was killed".
+
+ If the pipeline source CCB is killed, the pipeline target CCB's completion status may read (1,0) "Success".
+ This does not mean the target CCB was processed; since the source CCB was killed, there was no
+ meaningful output on which the target CCB could operate.
+
+36.3.3.2. Errors
+
+ EOK The request was proccessed and the result is valid.
+ EBADALIGN address is not on a 64-byte aligned.
+ ENORADDR The real address provided for address is not valid.
+ EINVAL The CCB completion area contents are not valid.
+ EWOULDBLOCK Internal resource contraints prevented the CCB from being killed at this time.
+ The guest should retry the request.
+ ENOACCESS The guest does not have permission to access the coprocessor virtual device
+ functionality.
+
+36.3.4. dax_info
+ trap# FAST_TRAP
+ function# DAX_INFO
+ ret0 status
+ ret1 Number of enabled DAX units
+ ret2 Number of disabled DAX units
+
+ Returns the number of DAX units that are enabled for the calling guest to submit CCBs. The number of
+ DAX units that are disabled for the calling guest are also returned. A disabled DAX unit would have been
+ available for CCB submission to the calling guest had it not been offlined.
+
+36.3.4.1. Errors
+
+ EOK The request was proccessed and the number of enabled/disabled DAX units
+ are valid.
+
+
+
+
+ 534
+
diff --git a/Documentation/sparc/oradax/oracle-dax.txt b/Documentation/sparc/oradax/oracle-dax.txt
new file mode 100644
index 000000000000..9d53ac93286f
--- /dev/null
+++ b/Documentation/sparc/oradax/oracle-dax.txt
@@ -0,0 +1,429 @@
+Oracle Data Analytics Accelerator (DAX)
+---------------------------------------
+
+DAX is a coprocessor which resides on the SPARC M7 (DAX1) and M8
+(DAX2) processor chips, and has direct access to the CPU's L3 caches
+as well as physical memory. It can perform several operations on data
+streams with various input and output formats. A driver provides a
+transport mechanism and has limited knowledge of the various opcodes
+and data formats. A user space library provides high level services
+and translates these into low level commands which are then passed
+into the driver and subsequently the Hypervisor and the coprocessor.
+The library is the recommended way for applications to use the
+coprocessor, and the driver interface is not intended for general use.
+This document describes the general flow of the driver, its
+structures, and its programmatic interface. It also provides example
+code sufficient to write user or kernel applications that use DAX
+functionality.
+
+The user library is open source and available at:
+ https://oss.oracle.com/git/gitweb.cgi?p=libdax.git
+
+The Hypervisor interface to the coprocessor is described in detail in
+the accompanying document, dax-hv-api.txt, which is a plain text
+excerpt of the (Oracle internal) "UltraSPARC Virtual Machine
+Specification" version 3.0.20+15, dated 2017-09-25.
+
+
+High Level Overview
+-------------------
+
+A coprocessor request is described by a Command Control Block
+(CCB). The CCB contains an opcode and various parameters. The opcode
+specifies what operation is to be done, and the parameters specify
+options, flags, sizes, and addresses. The CCB (or an array of CCBs)
+is passed to the Hypervisor, which handles queueing and scheduling of
+requests to the available coprocessor execution units. A status code
+returned indicates if the request was submitted successfully or if
+there was an error. One of the addresses given in each CCB is a
+pointer to a "completion area", which is a 128 byte memory block that
+is written by the coprocessor to provide execution status. No
+interrupt is generated upon completion; the completion area must be
+polled by software to find out when a transaction has finished, but
+the M7 and later processors provide a mechanism to pause the virtual
+processor until the completion status has been updated by the
+coprocessor. This is done using the monitored load and mwait
+instructions, which are described in more detail later. The DAX
+coprocessor was designed so that after a request is submitted, the
+kernel is no longer involved in the processing of it. The polling is
+done at the user level, which results in almost zero latency between
+completion of a request and resumption of execution of the requesting
+thread.
+
+
+Addressing Memory
+-----------------
+
+The kernel does not have access to physical memory in the Sun4v
+architecture, as there is an additional level of memory virtualization
+present. This intermediate level is called "real" memory, and the
+kernel treats this as if it were physical. The Hypervisor handles the
+translations between real memory and physical so that each logical
+domain (LDOM) can have a partition of physical memory that is isolated
+from that of other LDOMs. When the kernel sets up a virtual mapping,
+it specifies a virtual address and the real address to which it should
+be mapped.
+
+The DAX coprocessor can only operate on physical memory, so before a
+request can be fed to the coprocessor, all the addresses in a CCB must
+be converted into physical addresses. The kernel cannot do this since
+it has no visibility into physical addresses. So a CCB may contain
+either the virtual or real addresses of the buffers or a combination
+of them. An "address type" field is available for each address that
+may be given in the CCB. In all cases, the Hypervisor will translate
+all the addresses to physical before dispatching to hardware. Address
+translations are performed using the context of the process initiating
+the request.
+
+
+The Driver API
+--------------
+
+An application makes requests to the driver via the write() system
+call, and gets results (if any) via read(). The completion areas are
+made accessible via mmap(), and are read-only for the application.
+
+The request may either be an immediate command or an array of CCBs to
+be submitted to the hardware.
+
+Each open instance of the device is exclusive to the thread that
+opened it, and must be used by that thread for all subsequent
+operations. The driver open function creates a new context for the
+thread and initializes it for use. This context contains pointers and
+values used internally by the driver to keep track of submitted
+requests. The completion area buffer is also allocated, and this is
+large enough to contain the completion areas for many concurrent
+requests. When the device is closed, any outstanding transactions are
+flushed and the context is cleaned up.
+
+On a DAX1 system (M7), the device will be called "oradax1", while on a
+DAX2 system (M8) it will be "oradax2". If an application requires one
+or the other, it should simply attempt to open the appropriate
+device. Only one of the devices will exist on any given system, so the
+name can be used to determine what the platform supports.
+
+The immediate commands are CCB_DEQUEUE, CCB_KILL, and CCB_INFO. For
+all of these, success is indicated by a return value from write()
+equal to the number of bytes given in the call. Otherwise -1 is
+returned and errno is set.
+
+CCB_DEQUEUE
+
+Tells the driver to clean up resources associated with past
+requests. Since no interrupt is generated upon the completion of a
+request, the driver must be told when it may reclaim resources. No
+further status information is returned, so the user should not
+subsequently call read().
+
+CCB_KILL
+
+Kills a CCB during execution. The CCB is guaranteed to not continue
+executing once this call returns successfully. On success, read() must
+be called to retrieve the result of the action.
+
+CCB_INFO
+
+Retrieves information about a currently executing CCB. Note that some
+Hypervisors might return 'notfound' when the CCB is in 'inprogress'
+state. To ensure a CCB in the 'notfound' state will never be executed,
+CCB_KILL must be invoked on that CCB. Upon success, read() must be
+called to retrieve the details of the action.
+
+Submission of an array of CCBs for execution
+
+A write() whose length is a multiple of the CCB size is treated as a
+submit operation. The file offset is treated as the index of the
+completion area to use, and may be set via lseek() or using the
+pwrite() system call. If -1 is returned then errno is set to indicate
+the error. Otherwise, the return value is the length of the array that
+was actually accepted by the coprocessor. If the accepted length is
+equal to the requested length, then the submission was completely
+successful and there is no further status needed; hence, the user
+should not subsequently call read(). Partial acceptance of the CCB
+array is indicated by a return value less than the requested length,
+and read() must be called to retrieve further status information. The
+status will reflect the error caused by the first CCB that was not
+accepted, and status_data will provide additional data in some cases.
+
+MMAP
+
+The mmap() function provides access to the completion area allocated
+in the driver. Note that the completion area is not writeable by the
+user process, and the mmap call must not specify PROT_WRITE.
+
+
+Completion of a Request
+-----------------------
+
+The first byte in each completion area is the command status which is
+updated by the coprocessor hardware. Software may take advantage of
+new M7/M8 processor capabilities to efficiently poll this status byte.
+First, a "monitored load" is achieved via a Load from Alternate Space
+(ldxa, lduba, etc.) with ASI 0x84 (ASI_MONITOR_PRIMARY). Second, a
+"monitored wait" is achieved via the mwait instruction (a write to
+%asr28). This instruction is like pause in that it suspends execution
+of the virtual processor for the given number of nanoseconds, but in
+addition will terminate early when one of several events occur. If the
+block of data containing the monitored location is modified, then the
+mwait terminates. This causes software to resume execution immediately
+(without a context switch or kernel to user transition) after a
+transaction completes. Thus the latency between transaction completion
+and resumption of execution may be just a few nanoseconds.
+
+
+Application Life Cycle of a DAX Submission
+------------------------------------------
+
+ - open dax device
+ - call mmap() to get the completion area address
+ - allocate a CCB and fill in the opcode, flags, parameters, addresses, etc.
+ - submit CCB via write() or pwrite()
+ - go into a loop executing monitored load + monitored wait and
+ terminate when the command status indicates the request is complete
+ (CCB_KILL or CCB_INFO may be used any time as necessary)
+ - perform a CCB_DEQUEUE
+ - call munmap() for completion area
+ - close the dax device
+
+
+Memory Constraints
+------------------
+
+The DAX hardware operates only on physical addresses. Therefore, it is
+not aware of virtual memory mappings and the discontiguities that may
+exist in the physical memory that a virtual buffer maps to. There is
+no I/O TLB or any scatter/gather mechanism. All buffers, whether input
+or output, must reside in a physically contiguous region of memory.
+
+The Hypervisor translates all addresses within a CCB to physical
+before handing off the CCB to DAX. The Hypervisor determines the
+virtual page size for each virtual address given, and uses this to
+program a size limit for each address. This prevents the coprocessor
+from reading or writing beyond the bound of the virtual page, even
+though it is accessing physical memory directly. A simpler way of
+saying this is that a DAX operation will never "cross" a virtual page
+boundary. If an 8k virtual page is used, then the data is strictly
+limited to 8k. If a user's buffer is larger than 8k, then a larger
+page size must be used, or the transaction size will be truncated to
+8k.
+
+Huge pages. A user may allocate huge pages using standard interfaces.
+Memory buffers residing on huge pages may be used to achieve much
+larger DAX transaction sizes, but the rules must still be followed,
+and no transaction will cross a page boundary, even a huge page. A
+major caveat is that Linux on Sparc presents 8Mb as one of the huge
+page sizes. Sparc does not actually provide a 8Mb hardware page size,
+and this size is synthesized by pasting together two 4Mb pages. The
+reasons for this are historical, and it creates an issue because only
+half of this 8Mb page can actually be used for any given buffer in a
+DAX request, and it must be either the first half or the second half;
+it cannot be a 4Mb chunk in the middle, since that crosses a
+(hardware) page boundary. Note that this entire issue may be hidden by
+higher level libraries.
+
+
+CCB Structure
+-------------
+A CCB is an array of 8 64-bit words. Several of these words provide
+command opcodes, parameters, flags, etc., and the rest are addresses
+for the completion area, output buffer, and various inputs:
+
+ struct ccb {
+ u64 control;
+ u64 completion;
+ u64 input0;
+ u64 access;
+ u64 input1;
+ u64 op_data;
+ u64 output;
+ u64 table;
+ };
+
+See libdax/common/sys/dax1/dax1_ccb.h for a detailed description of
+each of these fields, and see dax-hv-api.txt for a complete description
+of the Hypervisor API available to the guest OS (ie, Linux kernel).
+
+The first word (control) is examined by the driver for the following:
+ - CCB version, which must be consistent with hardware version
+ - Opcode, which must be one of the documented allowable commands
+ - Address types, which must be set to "virtual" for all the addresses
+ given by the user, thereby ensuring that the application can
+ only access memory that it owns
+
+
+Example Code
+------------
+
+The DAX is accessible to both user and kernel code. The kernel code
+can make hypercalls directly while the user code must use wrappers
+provided by the driver. The setup of the CCB is nearly identical for
+both; the only difference is in preparation of the completion area. An
+example of user code is given now, with kernel code afterwards.
+
+In order to program using the driver API, the file
+arch/sparc/include/uapi/asm/oradax.h must be included.
+
+First, the proper device must be opened. For M7 it will be
+/dev/oradax1 and for M8 it will be /dev/oradax2. The simplest
+procedure is to attempt to open both, as only one will succeed:
+
+ fd = open("/dev/oradax1", O_RDWR);
+ if (fd < 0)
+ fd = open("/dev/oradax2", O_RDWR);
+ if (fd < 0)
+ /* No DAX found */
+
+Next, the completion area must be mapped:
+
+ completion_area = mmap(NULL, DAX_MMAP_LEN, PROT_READ, MAP_SHARED, fd, 0);
+
+All input and output buffers must be fully contained in one hardware
+page, since as explained above, the DAX is strictly constrained by
+virtual page boundaries. In addition, the output buffer must be
+64-byte aligned and its size must be a multiple of 64 bytes because
+the coprocessor writes in units of cache lines.
+
+This example demonstrates the DAX Scan command, which takes as input a
+vector and a match value, and produces a bitmap as the output. For
+each input element that matches the value, the corresponding bit is
+set in the output.
+
+In this example, the input vector consists of a series of single bits,
+and the match value is 0. So each 0 bit in the input will produce a 1
+in the output, and vice versa, which produces an output bitmap which
+is the input bitmap inverted.
+
+For details of all the parameters and bits used in this CCB, please
+refer to section 36.2.1.3 of the DAX Hypervisor API document, which
+describes the Scan command in detail.
+
+ ccb->control = /* Table 36.1, CCB Header Format */
+ (2L << 48) /* command = Scan Value */
+ | (3L << 40) /* output address type = primary virtual */
+ | (3L << 34) /* primary input address type = primary virtual */
+ /* Section 36.2.1, Query CCB Command Formats */
+ | (1 << 28) /* 36.2.1.1.1 primary input format = fixed width bit packed */
+ | (0 << 23) /* 36.2.1.1.2 primary input element size = 0 (1 bit) */
+ | (8 << 10) /* 36.2.1.1.6 output format = bit vector */
+ | (0 << 5) /* 36.2.1.3 First scan criteria size = 0 (1 byte) */
+ | (31 << 0); /* 36.2.1.3 Disable second scan criteria */
+
+ ccb->completion = 0; /* Completion area address, to be filled in by driver */
+
+ ccb->input0 = (unsigned long) input; /* primary input address */
+
+ ccb->access = /* Section 36.2.1.2, Data Access Control */
+ (2 << 24) /* Primary input length format = bits */
+ | (nbits - 1); /* number of bits in primary input stream, minus 1 */
+
+ ccb->input1 = 0; /* secondary input address, unused */
+
+ ccb->op_data = 0; /* scan criteria (value to be matched) */
+
+ ccb->output = (unsigned long) output; /* output address */
+
+ ccb->table = 0; /* table address, unused */
+
+The CCB submission is a write() or pwrite() system call to the
+driver. If the call fails, then a read() must be used to retrieve the
+status:
+
+ if (pwrite(fd, ccb, 64, 0) != 64) {
+ struct ccb_exec_result status;
+ read(fd, &status, sizeof(status));
+ /* bail out */
+ }
+
+After a successful submission of the CCB, the completion area may be
+polled to determine when the DAX is finished. Detailed information on
+the contents of the completion area can be found in section 36.2.2 of
+the DAX HV API document.
+
+ while (1) {
+ /* Monitored Load */
+ __asm__ __volatile__("lduba [%1] 0x84, %0\n"
+ : "=r" (status)
+ : "r" (completion_area));
+
+ if (status) /* 0 indicates command in progress */
+ break;
+
+ /* MWAIT */
+ __asm__ __volatile__("wr %%g0, 1000, %%asr28\n" ::); /* 1000 ns */
+ }
+
+A completion area status of 1 indicates successful completion of the
+CCB and validity of the output bitmap, which may be used immediately.
+All other non-zero values indicate error conditions which are
+described in section 36.2.2.
+
+ if (completion_area[0] != 1) { /* section 36.2.2, 1 = command ran and succeeded */
+ /* completion_area[0] contains the completion status */
+ /* completion_area[1] contains an error code, see 36.2.2 */
+ }
+
+After the completion area has been processed, the driver must be
+notified that it can release any resources associated with the
+request. This is done via the dequeue operation:
+
+ struct dax_command cmd;
+ cmd.command = CCB_DEQUEUE;
+ if (write(fd, &cmd, sizeof(cmd)) != sizeof(cmd)) {
+ /* bail out */
+ }
+
+Finally, normal program cleanup should be done, i.e., unmapping
+completion area, closing the dax device, freeing memory etc.
+
+[Kernel example]
+
+The only difference in using the DAX in kernel code is the treatment
+of the completion area. Unlike user applications which mmap the
+completion area allocated by the driver, kernel code must allocate its
+own memory to use for the completion area, and this address and its
+type must be given in the CCB:
+
+ ccb->control |= /* Table 36.1, CCB Header Format */
+ (3L << 32); /* completion area address type = primary virtual */
+
+ ccb->completion = (unsigned long) completion_area; /* Completion area address */
+
+The dax submit hypercall is made directly. The flags used in the
+ccb_submit call are documented in the DAX HV API in section 36.3.1.
+
+#include <asm/hypervisor.h>
+
+ hv_rv = sun4v_ccb_submit((unsigned long)ccb, 64,
+ HV_CCB_QUERY_CMD |
+ HV_CCB_ARG0_PRIVILEGED | HV_CCB_ARG0_TYPE_PRIMARY |
+ HV_CCB_VA_PRIVILEGED,
+ 0, &bytes_accepted, &status_data);
+
+ if (hv_rv != HV_EOK) {
+ /* hv_rv is an error code, status_data contains */
+ /* potential additional status, see 36.3.1.1 */
+ }
+
+After the submission, the completion area polling code is identical to
+that in user land:
+
+ while (1) {
+ /* Monitored Load */
+ __asm__ __volatile__("lduba [%1] 0x84, %0\n"
+ : "=r" (status)
+ : "r" (completion_area));
+
+ if (status) /* 0 indicates command in progress */
+ break;
+
+ /* MWAIT */
+ __asm__ __volatile__("wr %%g0, 1000, %%asr28\n" ::); /* 1000 ns */
+ }
+
+ if (completion_area[0] != 1) { /* section 36.2.2, 1 = command ran and succeeded */
+ /* completion_area[0] contains the completion status */
+ /* completion_area[1] contains an error code, see 36.2.2 */
+ }
+
+The output bitmap is ready for consumption immediately after the
+completion status indicates success.
diff --git a/Documentation/sphinx/kfigure.py b/Documentation/sphinx/kfigure.py
index cef4ad19624c..b97228d2cc0e 100644
--- a/Documentation/sphinx/kfigure.py
+++ b/Documentation/sphinx/kfigure.py
@@ -81,7 +81,7 @@ __version__ = '1.0.0'
# -------------
def which(cmd):
- """Searches the ``cmd`` in the ``PATH`` enviroment.
+ """Searches the ``cmd`` in the ``PATH`` environment.
This *which* searches the PATH for executable ``cmd`` . First match is
returned, if nothing is found, ``None` is returned.
@@ -419,7 +419,7 @@ def visit_kernel_render(self, node):
tmp_ext = RENDER_MARKUP_EXT.get(srclang, None)
if tmp_ext is None:
- app.warn('kernel-render: "%s" unknow / include raw.' % (srclang))
+ app.warn('kernel-render: "%s" unknown / include raw.' % (srclang))
return
if not dot_cmd and tmp_ext == '.dot':
@@ -482,7 +482,7 @@ class KernelRender(Figure):
srclang = self.arguments[0].strip()
if srclang not in RENDER_MARKUP_EXT.keys():
return [self.state_machine.reporter.warning(
- 'Unknow source language "%s", use one of: %s.' % (
+ 'Unknown source language "%s", use one of: %s.' % (
srclang, ",".join(RENDER_MARKUP_EXT.keys())),
line=self.lineno)]
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 694968c7523c..412314eebda6 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -34,6 +34,7 @@ show up in /proc/sys/kernel:
- hostname
- hotplug
- hardlockup_all_cpu_backtrace
+- hardlockup_panic
- hung_task_panic
- hung_task_check_count
- hung_task_timeout_secs
@@ -313,6 +314,19 @@ will be initiated.
1: on detection capture more debug information.
==============================================================
+hardlockup_panic:
+
+This parameter can be used to control whether the kernel panics
+when a hard lockup is detected.
+
+ 0 - don't panic on hard lockup
+ 1 - panic on hard lockup
+
+See Documentation/lockup-watchdogs.txt for more information. This can
+also be set using the nmi_watchdog kernel parameter.
+
+==============================================================
+
hotplug:
Path for the hotplug policy agent.
@@ -377,7 +391,8 @@ kptr_restrict:
This toggle indicates whether restrictions are placed on
exposing kernel addresses via /proc and other interfaces.
-When kptr_restrict is set to (0), the default, there are no restrictions.
+When kptr_restrict is set to 0 (the default) the address is hashed before
+printing. (This is the equivalent to %p.)
When kptr_restrict is set to (1), kernel pointers printed using the %pK
format specifier will be replaced with 0's unless the user has CAP_SYSLOG
diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt
index b67044a2575f..35c62f522754 100644
--- a/Documentation/sysctl/net.txt
+++ b/Documentation/sysctl/net.txt
@@ -95,7 +95,9 @@ dev_weight
--------------
The maximum number of packets that kernel can handle on a NAPI interrupt,
-it's a Per-CPU variable.
+it's a Per-CPU variable. For drivers that support LRO or GRO_HW, a hardware
+aggregated packet is counted as one packet in this context.
+
Default: 64
dev_weight_rx_bias
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 5025ff9307e6..ff234d229cbb 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -30,7 +30,6 @@ Currently, these files are in /proc/sys/vm:
- dirty_writeback_centisecs
- drop_caches
- extfrag_threshold
-- hugepages_treat_as_movable
- hugetlb_shm_group
- laptop_mode
- legacy_va_layout
@@ -261,30 +260,6 @@ any throttling.
==============================================================
-hugepages_treat_as_movable
-
-This parameter controls whether we can allocate hugepages from ZONE_MOVABLE
-or not. If set to non-zero, hugepages can be allocated from ZONE_MOVABLE.
-ZONE_MOVABLE is created when kernel boot parameter kernelcore= is specified,
-so this parameter has no effect if used without kernelcore=.
-
-Hugepage migration is now available in some situations which depend on the
-architecture and/or the hugepage size. If a hugepage supports migration,
-allocation from ZONE_MOVABLE is always enabled for the hugepage regardless
-of the value of this parameter.
-IOW, this parameter affects only non-migratable hugepages.
-
-Assuming that hugepages are not migratable in your system, one usecase of
-this parameter is that users can make hugepage pool more extensible by
-enabling the allocation from ZONE_MOVABLE. This is because on ZONE_MOVABLE
-page reclaim/migration/compaction work more and you can get contiguous
-memory more likely. Note that using ZONE_MOVABLE for non-migratable
-hugepages can do harm to other features like memory hotremove (because
-memory hotremove expects that memory blocks on ZONE_MOVABLE are always
-removable,) so it's a trade-off responsible for the users.
-
-==============================================================
-
hugetlb_shm_group
hugetlb_shm_group contains group id that is allowed to create SysV
diff --git a/Documentation/thermal/cpu-cooling-api.txt b/Documentation/thermal/cpu-cooling-api.txt
index 71653584cd03..7df567eaea1a 100644
--- a/Documentation/thermal/cpu-cooling-api.txt
+++ b/Documentation/thermal/cpu-cooling-api.txt
@@ -26,39 +26,16 @@ the user. The registration APIs returns the cooling device pointer.
clip_cpus: cpumask of cpus where the frequency constraints will happen.
1.1.2 struct thermal_cooling_device *of_cpufreq_cooling_register(
- struct device_node *np, const struct cpumask *clip_cpus)
+ struct cpufreq_policy *policy)
This interface function registers the cpufreq cooling device with
the name "thermal-cpufreq-%x" linking it with a device tree node, in
order to bind it via the thermal DT code. This api can support multiple
instances of cpufreq cooling devices.
- np: pointer to the cooling device device tree node
- clip_cpus: cpumask of cpus where the frequency constraints will happen.
+ policy: CPUFreq policy.
-1.1.3 struct thermal_cooling_device *cpufreq_power_cooling_register(
- const struct cpumask *clip_cpus, u32 capacitance,
- get_static_t plat_static_func)
-
-Similar to cpufreq_cooling_register, this function registers a cpufreq
-cooling device. Using this function, the cooling device will
-implement the power extensions by using a simple cpu power model. The
-cpus must have registered their OPPs using the OPP library.
-
-The additional parameters are needed for the power model (See 2. Power
-models). "capacitance" is the dynamic power coefficient (See 2.1
-Dynamic power). "plat_static_func" is a function to calculate the
-static power consumed by these cpus (See 2.2 Static power).
-
-1.1.4 struct thermal_cooling_device *of_cpufreq_power_cooling_register(
- struct device_node *np, const struct cpumask *clip_cpus, u32 capacitance,
- get_static_t plat_static_func)
-
-Similar to cpufreq_power_cooling_register, this function register a
-cpufreq cooling device with power extensions using the device tree
-information supplied by the np parameter.
-
-1.1.5 void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
+1.1.3 void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
This interface function unregisters the "thermal-cpufreq-%x" cooling device.
@@ -67,20 +44,14 @@ information supplied by the np parameter.
2. Power models
The power API registration functions provide a simple power model for
-CPUs. The current power is calculated as dynamic + (optionally)
-static power. This power model requires that the operating-points of
+CPUs. The current power is calculated as dynamic power (static power isn't
+supported currently). This power model requires that the operating-points of
the CPUs are registered using the kernel's opp library and the
`cpufreq_frequency_table` is assigned to the `struct device` of the
cpu. If you are using CONFIG_CPUFREQ_DT then the
`cpufreq_frequency_table` should already be assigned to the cpu
device.
-The `plat_static_func` parameter of `cpufreq_power_cooling_register()`
-and `of_cpufreq_power_cooling_register()` is optional. If you don't
-provide it, only dynamic power will be considered.
-
-2.1 Dynamic power
-
The dynamic power consumption of a processor depends on many factors.
For a given processor implementation the primary factors are:
@@ -119,79 +90,3 @@ mW/MHz/uVolt^2. Typical values for mobile CPUs might lie in range
from 100 to 500. For reference, the approximate values for the SoC in
ARM's Juno Development Platform are 530 for the Cortex-A57 cluster and
140 for the Cortex-A53 cluster.
-
-
-2.2 Static power
-
-Static leakage power consumption depends on a number of factors. For a
-given circuit implementation the primary factors are:
-
-- Time the circuit spends in each 'power state'
-- Temperature
-- Operating voltage
-- Process grade
-
-The time the circuit spends in each 'power state' for a given
-evaluation period at first order means OFF or ON. However,
-'retention' states can also be supported that reduce power during
-inactive periods without loss of context.
-
-Note: The visibility of state entries to the OS can vary, according to
-platform specifics, and this can then impact the accuracy of a model
-based on OS state information alone. It might be possible in some
-cases to extract more accurate information from system resources.
-
-The temperature, operating voltage and process 'grade' (slow to fast)
-of the circuit are all significant factors in static leakage power
-consumption. All of these have complex relationships to static power.
-
-Circuit implementation specific factors include the chosen silicon
-process as well as the type, number and size of transistors in both
-the logic gates and any RAM elements included.
-
-The static power consumption modelling must take into account the
-power managed regions that are implemented. Taking the example of an
-ARM processor cluster, the modelling would take into account whether
-each CPU can be powered OFF separately or if only a single power
-region is implemented for the complete cluster.
-
-In one view, there are others, a static power consumption model can
-then start from a set of reference values for each power managed
-region (e.g. CPU, Cluster/L2) in each state (e.g. ON, OFF) at an
-arbitrary process grade, voltage and temperature point. These values
-are then scaled for all of the following: the time in each state, the
-process grade, the current temperature and the operating voltage.
-However, since both implementation specific and complex relationships
-dominate the estimate, the appropriate interface to the model from the
-cpu cooling device is to provide a function callback that calculates
-the static power in this platform. When registering the cpu cooling
-device pass a function pointer that follows the `get_static_t`
-prototype:
-
- int plat_get_static(cpumask_t *cpumask, int interval,
- unsigned long voltage, u32 &power);
-
-`cpumask` is the cpumask of the cpus involved in the calculation.
-`voltage` is the voltage at which they are operating. The function
-should calculate the average static power for the last `interval`
-milliseconds. It returns 0 on success, -E* on error. If it
-succeeds, it should store the static power in `power`. Reading the
-temperature of the cpus described by `cpumask` is left for
-plat_get_static() to do as the platform knows best which thermal
-sensor is closest to the cpu.
-
-If `plat_static_func` is NULL, static power is considered to be
-negligible for this platform and only dynamic power is considered.
-
-The platform specific callback can then use any combination of tables
-and/or equations to permute the estimated value. Process grade
-information is not passed to the model since access to such data, from
-on-chip measurement capability or manufacture time data, is platform
-specific.
-
-Note: the significance of static power for CPUs in comparison to
-dynamic power is highly dependent on implementation. Given the
-potential complexity in implementation, the importance and accuracy of
-its inclusion when using cpu cooling devices should be assessed on a
-case by case basis.
-
diff --git a/Documentation/trace/ftrace-uses.rst b/Documentation/trace/ftrace-uses.rst
index 8494a801d341..3aed560a12ee 100644
--- a/Documentation/trace/ftrace-uses.rst
+++ b/Documentation/trace/ftrace-uses.rst
@@ -42,9 +42,9 @@ as well as what protections the callback will perform and not require
ftrace to handle.
There is only one field that is needed to be set when registering
-an ftrace_ops with ftrace::
+an ftrace_ops with ftrace:
-.. code-block: c
+.. code-block:: c
struct ftrace_ops ops = {
.func = my_callback_func,
@@ -81,12 +81,12 @@ may take some time to finish.
The callback function
=====================
-The prototype of the callback function is as follows (as of v4.14)::
+The prototype of the callback function is as follows (as of v4.14):
-.. code-block: c
+.. code-block:: c
- void callback_func(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *op, struct pt_regs *regs);
+ void callback_func(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op, struct pt_regs *regs);
@ip
This is the instruction pointer of the function that is being traced.
@@ -176,10 +176,10 @@ Filtering which functions to trace
If a callback is only to be called from specific functions, a filter must be
set up. The filters are added by name, or ip if it is known.
-.. code-block: c
+.. code-block:: c
- int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf,
- int len, int reset);
+ int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf,
+ int len, int reset);
@ops
The ops to set the filter with
@@ -202,9 +202,9 @@ See Filter Commands in :file:`Documentation/trace/ftrace.txt`.
To just trace the schedule function::
-.. code-block: c
+.. code-block:: c
- ret = ftrace_set_filter(&ops, "schedule", strlen("schedule"), 0);
+ ret = ftrace_set_filter(&ops, "schedule", strlen("schedule"), 0);
To add more functions, call the ftrace_set_filter() more than once with the
@reset parameter set to zero. To remove the current filter set and replace it
@@ -212,17 +212,17 @@ with new functions defined by @buf, have @reset be non-zero.
To remove all the filtered functions and trace all functions::
-.. code-block: c
+.. code-block:: c
- ret = ftrace_set_filter(&ops, NULL, 0, 1);
+ ret = ftrace_set_filter(&ops, NULL, 0, 1);
Sometimes more than one function has the same name. To trace just a specific
function in this case, ftrace_set_filter_ip() can be used.
-.. code-block: c
+.. code-block:: c
- ret = ftrace_set_filter_ip(&ops, ip, 0, 0);
+ ret = ftrace_set_filter_ip(&ops, ip, 0, 0);
Although the ip must be the address where the call to fentry or mcount is
located in the function. This function is used by perf and kprobes that
@@ -237,10 +237,10 @@ be called by any function.
An empty "notrace" list means to allow all functions defined by the filter
to be traced.
-.. code-block: c
+.. code-block:: c
- int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf,
- int len, int reset);
+ int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf,
+ int len, int reset);
This takes the same parameters as ftrace_set_filter() but will add the
functions it finds to not be traced. This is a separate list from the
@@ -251,7 +251,7 @@ that match @buf to it.
Clearing the "notrace" list is the same as clearing the filter list
-.. code-block: c
+.. code-block:: c
ret = ftrace_set_notrace(&ops, NULL, 0, 1);
@@ -264,29 +264,29 @@ If a filter is in place, and the @reset is non-zero, and @buf contains a
matching glob to functions, the switch will happen during the time of
the ftrace_set_filter() call. At no time will all functions call the callback.
-.. code-block: c
+.. code-block:: c
- ftrace_set_filter(&ops, "schedule", strlen("schedule"), 1);
+ ftrace_set_filter(&ops, "schedule", strlen("schedule"), 1);
- register_ftrace_function(&ops);
+ register_ftrace_function(&ops);
- msleep(10);
+ msleep(10);
- ftrace_set_filter(&ops, "try_to_wake_up", strlen("try_to_wake_up"), 1);
+ ftrace_set_filter(&ops, "try_to_wake_up", strlen("try_to_wake_up"), 1);
is not the same as:
-.. code-block: c
+.. code-block:: c
- ftrace_set_filter(&ops, "schedule", strlen("schedule"), 1);
+ ftrace_set_filter(&ops, "schedule", strlen("schedule"), 1);
- register_ftrace_function(&ops);
+ register_ftrace_function(&ops);
- msleep(10);
+ msleep(10);
- ftrace_set_filter(&ops, NULL, 0, 1);
+ ftrace_set_filter(&ops, NULL, 0, 1);
- ftrace_set_filter(&ops, "try_to_wake_up", strlen("try_to_wake_up"), 0);
+ ftrace_set_filter(&ops, "try_to_wake_up", strlen("try_to_wake_up"), 0);
As the latter will have a short time where all functions will call
the callback, between the time of the reset, and the time of the
diff --git a/Documentation/usb/chipidea.txt b/Documentation/usb/chipidea.txt
index edf7cdfddc88..d1eedc01b00a 100644
--- a/Documentation/usb/chipidea.txt
+++ b/Documentation/usb/chipidea.txt
@@ -23,13 +23,13 @@ cat /sys/kernel/debug/ci_hdrc.0/registers
2) Connect 2 boards with usb cable with one end is micro A plug, the other end
is micro B plug.
- The A-device(with micro A plug inserted) should enumrate B-device.
+ The A-device(with micro A plug inserted) should enumerate B-device.
3) Role switch
On B-device:
echo 1 > /sys/bus/platform/devices/ci_hdrc.0/inputs/b_bus_req
- B-device should take host role and enumrate A-device.
+ B-device should take host role and enumerate A-device.
4) A-device switch back to host.
On B-device:
@@ -40,13 +40,13 @@ cat /sys/kernel/debug/ci_hdrc.0/registers
side by answering the polling from B-Host, this can be done on A-device:
echo 1 > /sys/bus/platform/devices/ci_hdrc.0/inputs/a_bus_req
- A-device should switch back to host and enumrate B-device.
+ A-device should switch back to host and enumerate B-device.
5) Remove B-device(unplug micro B plug) and insert again in 10 seconds,
- A-device should enumrate B-device again.
+ A-device should enumerate B-device again.
6) Remove B-device(unplug micro B plug) and insert again after 10 seconds,
- A-device should NOT enumrate B-device.
+ A-device should NOT enumerate B-device.
if A-device wants to use bus:
On A-device:
@@ -67,7 +67,7 @@ cat /sys/kernel/debug/ci_hdrc.0/registers
On B-device:
echo 1 > /sys/bus/platform/devices/ci_hdrc.0/inputs/b_bus_req
- A-device should resume usb bus and enumrate B-device.
+ A-device should resume usb bus and enumerate B-device.
1.3 Reference document
----------------------
diff --git a/Documentation/usb/gadget-testing.txt b/Documentation/usb/gadget-testing.txt
index 441a4b9b666f..5908a21fddb6 100644
--- a/Documentation/usb/gadget-testing.txt
+++ b/Documentation/usb/gadget-testing.txt
@@ -693,7 +693,7 @@ such specification consists of a number of lines with an inverval value
in each line. The rules stated above are best illustrated with an example:
# mkdir functions/uvc.usb0/control/header/h
-# cd functions/uvc.usb0/control/header/h
+# cd functions/uvc.usb0/control/
# ln -s header/h class/fs
# ln -s header/h class/ss
# mkdir -p functions/uvc.usb0/streaming/uncompressed/u/360p
diff --git a/Documentation/usb/usbip_protocol.txt b/Documentation/usb/usbip_protocol.txt
index 16b6fe27284c..c7a0f4c7e7f1 100644
--- a/Documentation/usb/usbip_protocol.txt
+++ b/Documentation/usb/usbip_protocol.txt
@@ -274,7 +274,6 @@ USBIP_CMD_SUBMIT: Submit an URB
URB_SHORT_NOT_OK | 0x00000001 | only in | only in | only in | no
URB_ISO_ASAP | 0x00000002 | no | no | no | yes
URB_NO_TRANSFER_DMA_MAP | 0x00000004 | yes | yes | yes | yes
- URB_NO_FSBR | 0x00000020 | yes | no | no | no
URB_ZERO_PACKET | 0x00000040 | no | no | only out | no
URB_NO_INTERRUPT | 0x00000080 | yes | yes | yes | yes
URB_FREE_BUFFER | 0x00000100 | yes | yes | yes | yes
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 57d3ee9e4bde..fc3ae951bc07 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3403,6 +3403,52 @@ invalid, if invalid pages are written to (e.g. after the end of memory)
or if no page table is present for the addresses (e.g. when using
hugepages).
+4.108 KVM_PPC_GET_CPU_CHAR
+
+Capability: KVM_CAP_PPC_GET_CPU_CHAR
+Architectures: powerpc
+Type: vm ioctl
+Parameters: struct kvm_ppc_cpu_char (out)
+Returns: 0 on successful completion
+ -EFAULT if struct kvm_ppc_cpu_char cannot be written
+
+This ioctl gives userspace information about certain characteristics
+of the CPU relating to speculative execution of instructions and
+possible information leakage resulting from speculative execution (see
+CVE-2017-5715, CVE-2017-5753 and CVE-2017-5754). The information is
+returned in struct kvm_ppc_cpu_char, which looks like this:
+
+struct kvm_ppc_cpu_char {
+ __u64 character; /* characteristics of the CPU */
+ __u64 behaviour; /* recommended software behaviour */
+ __u64 character_mask; /* valid bits in character */
+ __u64 behaviour_mask; /* valid bits in behaviour */
+};
+
+For extensibility, the character_mask and behaviour_mask fields
+indicate which bits of character and behaviour have been filled in by
+the kernel. If the set of defined bits is extended in future then
+userspace will be able to tell whether it is running on a kernel that
+knows about the new bits.
+
+The character field describes attributes of the CPU which can help
+with preventing inadvertent information disclosure - specifically,
+whether there is an instruction to flash-invalidate the L1 data cache
+(ori 30,30,0 or mtspr SPRN_TRIG2,rN), whether the L1 data cache is set
+to a mode where entries can only be used by the thread that created
+them, whether the bcctr[l] instruction prevents speculation, and
+whether a speculation barrier instruction (ori 31,31,0) is provided.
+
+The behaviour field describes actions that software should take to
+prevent inadvertent information disclosure, and thus describes which
+vulnerabilities the hardware is subject to; specifically whether the
+L1 data cache should be flushed when returning to user mode from the
+kernel, and whether a speculation barrier should be placed between an
+array bounds check and the array access.
+
+These fields use the same bit definitions as the new
+H_GET_CPU_CHARACTERISTICS hypercall.
+
5. The kvm_run structure
------------------------
diff --git a/Documentation/vm/hugetlbpage.txt b/Documentation/vm/hugetlbpage.txt
index 59cbc803aad6..faf077d50d42 100644
--- a/Documentation/vm/hugetlbpage.txt
+++ b/Documentation/vm/hugetlbpage.txt
@@ -20,19 +20,20 @@ options.
The /proc/meminfo file provides information about the total number of
persistent hugetlb pages in the kernel's huge page pool. It also displays
-information about the number of free, reserved and surplus huge pages and the
-default huge page size. The huge page size is needed for generating the
-proper alignment and size of the arguments to system calls that map huge page
-regions.
+default huge page size and information about the number of free, reserved
+and surplus huge pages in the pool of huge pages of default size.
+The huge page size is needed for generating the proper alignment and
+size of the arguments to system calls that map huge page regions.
The output of "cat /proc/meminfo" will include lines like:
.....
-HugePages_Total: vvv
-HugePages_Free: www
-HugePages_Rsvd: xxx
-HugePages_Surp: yyy
-Hugepagesize: zzz kB
+HugePages_Total: uuu
+HugePages_Free: vvv
+HugePages_Rsvd: www
+HugePages_Surp: xxx
+Hugepagesize: yyy kB
+Hugetlb: zzz kB
where:
HugePages_Total is the size of the pool of huge pages.
@@ -47,6 +48,14 @@ HugePages_Surp is short for "surplus," and is the number of huge pages in
the pool above the value in /proc/sys/vm/nr_hugepages. The
maximum number of surplus huge pages is controlled by
/proc/sys/vm/nr_overcommit_hugepages.
+Hugepagesize is the default hugepage size (in Kb).
+Hugetlb is the total amount of memory (in kB), consumed by huge
+ pages of all sizes.
+ If huge pages of different sizes are in use, this number
+ will exceed HugePages_Total * Hugepagesize. To get more
+ detailed information, please, refer to
+ /sys/kernel/mm/hugepages (described below).
+
/proc/filesystems should also show a filesystem of type "hugetlbfs" configured
in the kernel.
diff --git a/Documentation/vm/hwpoison.txt b/Documentation/vm/hwpoison.txt
index 6ae89a9edf2a..e912d7eee769 100644
--- a/Documentation/vm/hwpoison.txt
+++ b/Documentation/vm/hwpoison.txt
@@ -104,7 +104,7 @@ madvise(MADV_HWPOISON, ....)
hwpoison-inject module through debugfs
-/sys/debug/hwpoison/
+/sys/kernel/debug/hwpoison/
corrupt-pfn
diff --git a/Documentation/w1/masters/w1-gpio b/Documentation/w1/masters/w1-gpio
index af5d3b4aa851..623961d9e83f 100644
--- a/Documentation/w1/masters/w1-gpio
+++ b/Documentation/w1/masters/w1-gpio
@@ -8,17 +8,27 @@ Description
-----------
GPIO 1-wire bus master driver. The driver uses the GPIO API to control the
-wire and the GPIO pin can be specified using platform data.
+wire and the GPIO pin can be specified using GPIO machine descriptor tables.
+It is also possible to define the master using device tree, see
+Documentation/devicetree/bindings/w1/w1-gpio.txt
Example (mach-at91)
-------------------
+#include <linux/gpio/machine.h>
#include <linux/w1-gpio.h>
+static struct gpiod_lookup_table foo_w1_gpiod_table = {
+ .dev_id = "w1-gpio",
+ .table = {
+ GPIO_LOOKUP_IDX("at91-gpio", AT91_PIN_PB20, NULL, 0,
+ GPIO_ACTIVE_HIGH|GPIO_OPEN_DRAIN),
+ },
+};
+
static struct w1_gpio_platform_data foo_w1_gpio_pdata = {
- .pin = AT91_PIN_PB20,
- .is_open_drain = 1,
+ .ext_pullup_enable_pin = -EINVAL,
};
static struct platform_device foo_w1_device = {
@@ -30,4 +40,5 @@ static struct platform_device foo_w1_device = {
...
at91_set_GPIO_periph(foo_w1_gpio_pdata.pin, 1);
at91_set_multi_drive(foo_w1_gpio_pdata.pin, 1);
+ gpiod_add_lookup_table(&foo_w1_gpiod_table);
platform_device_register(&foo_w1_device);
diff --git a/Documentation/w1/w1.generic b/Documentation/w1/w1.generic
index b3ffaf8cfab2..c51b1ab012d0 100644
--- a/Documentation/w1/w1.generic
+++ b/Documentation/w1/w1.generic
@@ -76,7 +76,7 @@ See struct w1_bus_master definition in w1.h for details.
w1 master sysfs interface
------------------------------------------------------------------
-<xx-xxxxxxxxxxxxx> - A directory for a found device. The format is family-serial
+<xx-xxxxxxxxxxxx> - A directory for a found device. The format is family-serial
bus - (standard) symlink to the w1 bus
driver - (standard) symlink to the w1 driver
w1_master_add - (rw) manually register a slave device
diff --git a/Documentation/x86/intel_rdt_ui.txt b/Documentation/x86/intel_rdt_ui.txt
index 6851854cf69d..756fd76b78a6 100644
--- a/Documentation/x86/intel_rdt_ui.txt
+++ b/Documentation/x86/intel_rdt_ui.txt
@@ -7,15 +7,24 @@ Tony Luck <tony.luck@intel.com>
Vikas Shivappa <vikas.shivappa@intel.com>
This feature is enabled by the CONFIG_INTEL_RDT Kconfig and the
-X86 /proc/cpuinfo flag bits "rdt", "cqm", "cat_l3" and "cdp_l3".
+X86 /proc/cpuinfo flag bits:
+RDT (Resource Director Technology) Allocation - "rdt_a"
+CAT (Cache Allocation Technology) - "cat_l3", "cat_l2"
+CDP (Code and Data Prioritization ) - "cdp_l3", "cdp_l2"
+CQM (Cache QoS Monitoring) - "cqm_llc", "cqm_occup_llc"
+MBM (Memory Bandwidth Monitoring) - "cqm_mbm_total", "cqm_mbm_local"
+MBA (Memory Bandwidth Allocation) - "mba"
To use the feature mount the file system:
- # mount -t resctrl resctrl [-o cdp] /sys/fs/resctrl
+ # mount -t resctrl resctrl [-o cdp[,cdpl2]] /sys/fs/resctrl
mount options are:
"cdp": Enable code/data prioritization in L3 cache allocations.
+"cdpl2": Enable code/data prioritization in L2 cache allocations.
+
+L2 and L3 CDP are controlled seperately.
RDT features are orthogonal. A particular system may support only
monitoring, only control, or both monitoring and control.
diff --git a/Documentation/x86/pti.txt b/Documentation/x86/pti.txt
new file mode 100644
index 000000000000..5cd58439ad2d
--- /dev/null
+++ b/Documentation/x86/pti.txt
@@ -0,0 +1,186 @@
+Overview
+========
+
+Page Table Isolation (pti, previously known as KAISER[1]) is a
+countermeasure against attacks on the shared user/kernel address
+space such as the "Meltdown" approach[2].
+
+To mitigate this class of attacks, we create an independent set of
+page tables for use only when running userspace applications. When
+the kernel is entered via syscalls, interrupts or exceptions, the
+page tables are switched to the full "kernel" copy. When the system
+switches back to user mode, the user copy is used again.
+
+The userspace page tables contain only a minimal amount of kernel
+data: only what is needed to enter/exit the kernel such as the
+entry/exit functions themselves and the interrupt descriptor table
+(IDT). There are a few strictly unnecessary things that get mapped
+such as the first C function when entering an interrupt (see
+comments in pti.c).
+
+This approach helps to ensure that side-channel attacks leveraging
+the paging structures do not function when PTI is enabled. It can be
+enabled by setting CONFIG_PAGE_TABLE_ISOLATION=y at compile time.
+Once enabled at compile-time, it can be disabled at boot with the
+'nopti' or 'pti=' kernel parameters (see kernel-parameters.txt).
+
+Page Table Management
+=====================
+
+When PTI is enabled, the kernel manages two sets of page tables.
+The first set is very similar to the single set which is present in
+kernels without PTI. This includes a complete mapping of userspace
+that the kernel can use for things like copy_to_user().
+
+Although _complete_, the user portion of the kernel page tables is
+crippled by setting the NX bit in the top level. This ensures
+that any missed kernel->user CR3 switch will immediately crash
+userspace upon executing its first instruction.
+
+The userspace page tables map only the kernel data needed to enter
+and exit the kernel. This data is entirely contained in the 'struct
+cpu_entry_area' structure which is placed in the fixmap which gives
+each CPU's copy of the area a compile-time-fixed virtual address.
+
+For new userspace mappings, the kernel makes the entries in its
+page tables like normal. The only difference is when the kernel
+makes entries in the top (PGD) level. In addition to setting the
+entry in the main kernel PGD, a copy of the entry is made in the
+userspace page tables' PGD.
+
+This sharing at the PGD level also inherently shares all the lower
+layers of the page tables. This leaves a single, shared set of
+userspace page tables to manage. One PTE to lock, one set of
+accessed bits, dirty bits, etc...
+
+Overhead
+========
+
+Protection against side-channel attacks is important. But,
+this protection comes at a cost:
+
+1. Increased Memory Use
+ a. Each process now needs an order-1 PGD instead of order-0.
+ (Consumes an additional 4k per process).
+ b. The 'cpu_entry_area' structure must be 2MB in size and 2MB
+ aligned so that it can be mapped by setting a single PMD
+ entry. This consumes nearly 2MB of RAM once the kernel
+ is decompressed, but no space in the kernel image itself.
+
+2. Runtime Cost
+ a. CR3 manipulation to switch between the page table copies
+ must be done at interrupt, syscall, and exception entry
+ and exit (it can be skipped when the kernel is interrupted,
+ though.) Moves to CR3 are on the order of a hundred
+ cycles, and are required at every entry and exit.
+ b. A "trampoline" must be used for SYSCALL entry. This
+ trampoline depends on a smaller set of resources than the
+ non-PTI SYSCALL entry code, so requires mapping fewer
+ things into the userspace page tables. The downside is
+ that stacks must be switched at entry time.
+ c. Global pages are disabled for all kernel structures not
+ mapped into both kernel and userspace page tables. This
+ feature of the MMU allows different processes to share TLB
+ entries mapping the kernel. Losing the feature means more
+ TLB misses after a context switch. The actual loss of
+ performance is very small, however, never exceeding 1%.
+ d. Process Context IDentifiers (PCID) is a CPU feature that
+ allows us to skip flushing the entire TLB when switching page
+ tables by setting a special bit in CR3 when the page tables
+ are changed. This makes switching the page tables (at context
+ switch, or kernel entry/exit) cheaper. But, on systems with
+ PCID support, the context switch code must flush both the user
+ and kernel entries out of the TLB. The user PCID TLB flush is
+ deferred until the exit to userspace, minimizing the cost.
+ See intel.com/sdm for the gory PCID/INVPCID details.
+ e. The userspace page tables must be populated for each new
+ process. Even without PTI, the shared kernel mappings
+ are created by copying top-level (PGD) entries into each
+ new process. But, with PTI, there are now *two* kernel
+ mappings: one in the kernel page tables that maps everything
+ and one for the entry/exit structures. At fork(), we need to
+ copy both.
+ f. In addition to the fork()-time copying, there must also
+ be an update to the userspace PGD any time a set_pgd() is done
+ on a PGD used to map userspace. This ensures that the kernel
+ and userspace copies always map the same userspace
+ memory.
+ g. On systems without PCID support, each CR3 write flushes
+ the entire TLB. That means that each syscall, interrupt
+ or exception flushes the TLB.
+ h. INVPCID is a TLB-flushing instruction which allows flushing
+ of TLB entries for non-current PCIDs. Some systems support
+ PCIDs, but do not support INVPCID. On these systems, addresses
+ can only be flushed from the TLB for the current PCID. When
+ flushing a kernel address, we need to flush all PCIDs, so a
+ single kernel address flush will require a TLB-flushing CR3
+ write upon the next use of every PCID.
+
+Possible Future Work
+====================
+1. We can be more careful about not actually writing to CR3
+ unless its value is actually changed.
+2. Allow PTI to be enabled/disabled at runtime in addition to the
+ boot-time switching.
+
+Testing
+========
+
+To test stability of PTI, the following test procedure is recommended,
+ideally doing all of these in parallel:
+
+1. Set CONFIG_DEBUG_ENTRY=y
+2. Run several copies of all of the tools/testing/selftests/x86/ tests
+ (excluding MPX and protection_keys) in a loop on multiple CPUs for
+ several minutes. These tests frequently uncover corner cases in the
+ kernel entry code. In general, old kernels might cause these tests
+ themselves to crash, but they should never crash the kernel.
+3. Run the 'perf' tool in a mode (top or record) that generates many
+ frequent performance monitoring non-maskable interrupts (see "NMI"
+ in /proc/interrupts). This exercises the NMI entry/exit code which
+ is known to trigger bugs in code paths that did not expect to be
+ interrupted, including nested NMIs. Using "-c" boosts the rate of
+ NMIs, and using two -c with separate counters encourages nested NMIs
+ and less deterministic behavior.
+
+ while true; do perf record -c 10000 -e instructions,cycles -a sleep 10; done
+
+4. Launch a KVM virtual machine.
+5. Run 32-bit binaries on systems supporting the SYSCALL instruction.
+ This has been a lightly-tested code path and needs extra scrutiny.
+
+Debugging
+=========
+
+Bugs in PTI cause a few different signatures of crashes
+that are worth noting here.
+
+ * Failures of the selftests/x86 code. Usually a bug in one of the
+ more obscure corners of entry_64.S
+ * Crashes in early boot, especially around CPU bringup. Bugs
+ in the trampoline code or mappings cause these.
+ * Crashes at the first interrupt. Caused by bugs in entry_64.S,
+ like screwing up a page table switch. Also caused by
+ incorrectly mapping the IRQ handler entry code.
+ * Crashes at the first NMI. The NMI code is separate from main
+ interrupt handlers and can have bugs that do not affect
+ normal interrupts. Also caused by incorrectly mapping NMI
+ code. NMIs that interrupt the entry code must be very
+ careful and can be the cause of crashes that show up when
+ running perf.
+ * Kernel crashes at the first exit to userspace. entry_64.S
+ bugs, or failing to map some of the exit code.
+ * Crashes at first interrupt that interrupts userspace. The paths
+ in entry_64.S that return to userspace are sometimes separate
+ from the ones that return to the kernel.
+ * Double faults: overflowing the kernel stack because of page
+ faults upon page faults. Caused by touching non-pti-mapped
+ data in the entry code, or forgetting to switch to kernel
+ CR3 before calling into C functions which are not pti-mapped.
+ * Userspace segfaults early in boot, sometimes manifesting
+ as mount(8) failing to mount the rootfs. These have
+ tended to be TLB invalidation issues. Usually invalidating
+ the wrong PCID, or otherwise missing an invalidation.
+
+1. https://gruss.cc/files/kaiser.pdf
+2. https://meltdownattack.com/meltdown.pdf
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index ad41b3813f0a..ea91cb61a602 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -12,8 +12,9 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
... unused hole ...
ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
... unused hole ...
-fffffe0000000000 - fffffe7fffffffff (=39 bits) LDT remap for PTI
-fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
+ vaddr_end for KASLR
+fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping
+fffffe8000000000 - fffffeffffffffff (=39 bits) LDT remap for PTI
ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
... unused hole ...
ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
@@ -37,13 +38,15 @@ ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
... unused hole ...
ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
... unused hole ...
-fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
+ vaddr_end for KASLR
+fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping
+... unused hole ...
ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
... unused hole ...
ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
... unused hole ...
ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0
-ffffffffa0000000 - [fixmap start] (~1526 MB) module mapping space
+ffffffffa0000000 - fffffffffeffffff (1520 MB) module mapping space
[fixmap start] - ffffffffff5fffff kernel-internal fixmap range
ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
@@ -67,9 +70,10 @@ memory window (this size is arbitrary, it can be raised later if needed).
The mappings are not part of any other kernel PGD and are only available
during EFI runtime calls.
-The module mapping space size changes based on the CONFIG requirements for the
-following fixmap section.
-
Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
physical memory, vmalloc/ioremap space and virtual memory map are randomized.
Their order is preserved but their base will be offset early at boot time.
+
+Be very careful vs. KASLR when changing anything here. The KASLR address
+range must not overlap with anything except the KASAN shadow area, which is
+correct as KASAN disables KASLR.
diff --git a/Documentation/xtensa/mmu.txt b/Documentation/xtensa/mmu.txt
index 5de8715d5bec..318114de63f3 100644
--- a/Documentation/xtensa/mmu.txt
+++ b/Documentation/xtensa/mmu.txt
@@ -69,19 +69,10 @@ Default MMUv2-compatible layout.
| Userspace | 0x00000000 TASK_SIZE
+------------------+ 0x40000000
+------------------+
-| Page table | 0x80000000
-+------------------+ 0x80400000
+| Page table | XCHAL_PAGE_TABLE_VADDR 0x80000000 XCHAL_PAGE_TABLE_SIZE
+------------------+
-| KMAP area | PKMAP_BASE PTRS_PER_PTE *
-| | DCACHE_N_COLORS *
-| | PAGE_SIZE
-| | (4MB * DCACHE_N_COLORS)
-+------------------+
-| Atomic KMAP area | FIXADDR_START KM_TYPE_NR *
-| | NR_CPUS *
-| | DCACHE_N_COLORS *
-| | PAGE_SIZE
-+------------------+ FIXADDR_TOP 0xbffff000
+| KASAN shadow map | KASAN_SHADOW_START 0x80400000 KASAN_SHADOW_SIZE
++------------------+ 0x8e400000
+------------------+
| VMALLOC area | VMALLOC_START 0xc0000000 128MB - 64KB
+------------------+ VMALLOC_END
@@ -92,6 +83,17 @@ Default MMUv2-compatible layout.
| remap area 2 |
+------------------+
+------------------+
+| KMAP area | PKMAP_BASE PTRS_PER_PTE *
+| | DCACHE_N_COLORS *
+| | PAGE_SIZE
+| | (4MB * DCACHE_N_COLORS)
++------------------+
+| Atomic KMAP area | FIXADDR_START KM_TYPE_NR *
+| | NR_CPUS *
+| | DCACHE_N_COLORS *
+| | PAGE_SIZE
++------------------+ FIXADDR_TOP 0xcffff000
++------------------+
| Cached KSEG | XCHAL_KSEG_CACHED_VADDR 0xd0000000 128MB
+------------------+
| Uncached KSEG | XCHAL_KSEG_BYPASS_VADDR 0xd8000000 128MB
@@ -109,19 +111,10 @@ Default MMUv2-compatible layout.
| Userspace | 0x00000000 TASK_SIZE
+------------------+ 0x40000000
+------------------+
-| Page table | 0x80000000
-+------------------+ 0x80400000
+| Page table | XCHAL_PAGE_TABLE_VADDR 0x80000000 XCHAL_PAGE_TABLE_SIZE
+------------------+
-| KMAP area | PKMAP_BASE PTRS_PER_PTE *
-| | DCACHE_N_COLORS *
-| | PAGE_SIZE
-| | (4MB * DCACHE_N_COLORS)
-+------------------+
-| Atomic KMAP area | FIXADDR_START KM_TYPE_NR *
-| | NR_CPUS *
-| | DCACHE_N_COLORS *
-| | PAGE_SIZE
-+------------------+ FIXADDR_TOP 0x9ffff000
+| KASAN shadow map | KASAN_SHADOW_START 0x80400000 KASAN_SHADOW_SIZE
++------------------+ 0x8e400000
+------------------+
| VMALLOC area | VMALLOC_START 0xa0000000 128MB - 64KB
+------------------+ VMALLOC_END
@@ -132,6 +125,17 @@ Default MMUv2-compatible layout.
| remap area 2 |
+------------------+
+------------------+
+| KMAP area | PKMAP_BASE PTRS_PER_PTE *
+| | DCACHE_N_COLORS *
+| | PAGE_SIZE
+| | (4MB * DCACHE_N_COLORS)
++------------------+
+| Atomic KMAP area | FIXADDR_START KM_TYPE_NR *
+| | NR_CPUS *
+| | DCACHE_N_COLORS *
+| | PAGE_SIZE
++------------------+ FIXADDR_TOP 0xaffff000
++------------------+
| Cached KSEG | XCHAL_KSEG_CACHED_VADDR 0xb0000000 256MB
+------------------+
| Uncached KSEG | XCHAL_KSEG_BYPASS_VADDR 0xc0000000 256MB
@@ -150,19 +154,10 @@ Default MMUv2-compatible layout.
| Userspace | 0x00000000 TASK_SIZE
+------------------+ 0x40000000
+------------------+
-| Page table | 0x80000000
-+------------------+ 0x80400000
+| Page table | XCHAL_PAGE_TABLE_VADDR 0x80000000 XCHAL_PAGE_TABLE_SIZE
+------------------+
-| KMAP area | PKMAP_BASE PTRS_PER_PTE *
-| | DCACHE_N_COLORS *
-| | PAGE_SIZE
-| | (4MB * DCACHE_N_COLORS)
-+------------------+
-| Atomic KMAP area | FIXADDR_START KM_TYPE_NR *
-| | NR_CPUS *
-| | DCACHE_N_COLORS *
-| | PAGE_SIZE
-+------------------+ FIXADDR_TOP 0x8ffff000
+| KASAN shadow map | KASAN_SHADOW_START 0x80400000 KASAN_SHADOW_SIZE
++------------------+ 0x8e400000
+------------------+
| VMALLOC area | VMALLOC_START 0x90000000 128MB - 64KB
+------------------+ VMALLOC_END
@@ -173,6 +168,17 @@ Default MMUv2-compatible layout.
| remap area 2 |
+------------------+
+------------------+
+| KMAP area | PKMAP_BASE PTRS_PER_PTE *
+| | DCACHE_N_COLORS *
+| | PAGE_SIZE
+| | (4MB * DCACHE_N_COLORS)
++------------------+
+| Atomic KMAP area | FIXADDR_START KM_TYPE_NR *
+| | NR_CPUS *
+| | DCACHE_N_COLORS *
+| | PAGE_SIZE
++------------------+ FIXADDR_TOP 0x9ffff000
++------------------+
| Cached KSEG | XCHAL_KSEG_CACHED_VADDR 0xa0000000 512MB
+------------------+
| Uncached KSEG | XCHAL_KSEG_BYPASS_VADDR 0xc0000000 512MB